#importing packages
import pandas as pd # data preprocessing
import numpy as np # linear algebra
import matplotlib.pyplot as plt #Basic plots visualisations
%matplotlib inline
import seaborn as sns #for prettier plots
import os
import pickle
import warnings
warnings.filterwarnings('ignore')
plt.style.use('fivethirtyeight')
plt.figure(figsize=(8,6))
pd.set_option('display.max_columns' , 300)
pd.set_option('display.max_rows' , 300)
<Figure size 576x432 with 0 Axes>
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor , AdaBoostRegressor , GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error,mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from xgboost.sklearn import XGBRegressor
import os
os.getcwd()
'C:\\Users\\Hp\\Desktop\\Projects\\task\\russian housing market'
train_data=pd.read_csv("train_data.csv")
train_data.shape
(30471, 292)
def summary(train_data):
print('Shape of data :' ,train_data.shape )
return(pd.DataFrame({"Datatype":train_data.dtypes ,
"NAs":train_data.isnull().sum() ,
"Uniques":train_data.nunique(),
"levels": [train_data[x].unique() for x in train_data.columns]}))
summary(train_data)
Shape of data : (30471, 292)
| Datatype | NAs | Uniques | levels | |
|---|---|---|---|---|
| id | int64 | 0 | 30471 | [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... |
| timestamp | object | 0 | 1161 | [2011-08-20, 2011-08-23, 2011-08-27, 2011-09-0... |
| full_sq | int64 | 0 | 211 | [43, 34, 89, 77, 67, 25, 44, 42, 36, 38, 31, 5... |
| life_sq | float64 | 6383 | 175 | [27.0, 19.0, 29.0, 50.0, 77.0, 46.0, 14.0, 44.... |
| floor | float64 | 167 | 41 | [4.0, 3.0, 2.0, 9.0, 14.0, 10.0, 5.0, 12.0, 11... |
| max_floor | float64 | 9572 | 49 | [nan, 17.0, 5.0, 22.0, 16.0, 9.0, 8.0, 0.0, 24... |
| material | float64 | 9572 | 6 | [nan, 1.0, 2.0, 4.0, 6.0, 5.0, 3.0] |
| build_year | float64 | 13605 | 119 | [nan, 1907.0, 1980.0, 2014.0, 1970.0, 1982.0, ... |
| num_room | float64 | 9572 | 13 | [nan, 2.0, 1.0, 3.0, 4.0, 5.0, 6.0, 0.0, 19.0,... |
| kitch_sq | float64 | 9572 | 74 | [nan, 11.0, 12.0, 0.0, 8.0, 1.0, 6.0, 10.0, 9.... |
| state | float64 | 13559 | 5 | [nan, 3.0, 1.0, 2.0, 4.0, 33.0] |
| product_type | object | 0 | 2 | [Investment, OwnerOccupier] |
| sub_area | object | 0 | 146 | [Bibirevo, Nagatinskij Zaton, Tekstil'shhiki, ... |
| area_m | float64 | 0 | 146 | [6407578.1, 9589336.912, 4808269.831, 12583535... |
| raion_popul | int64 | 0 | 146 | [155572, 115352, 101708, 178473, 108171, 43795... |
| green_zone_part | float64 | 0 | 146 | [0.189727117, 0.37260204399999997, 0.112559644... |
| indust_part | float64 | 0 | 132 | [6.99893e-05, 0.049637257000000004, 0.11853738... |
| children_preschool | int64 | 0 | 146 | [9576, 6880, 5879, 13087, 5706, 2418, 2459, 65... |
| preschool_quota | float64 | 6688 | 121 | [5001.0, 3119.0, 1463.0, 6839.0, 3240.0, 852.0... |
| preschool_education_centers_raion | int64 | 0 | 13 | [5, 4, 9, 7, 2, 3, 13, 8, 6, 1, 10, 11, 0] |
| children_school | int64 | 0 | 146 | [10309, 7759, 6207, 13670, 6748, 2514, 2810, 6... |
| school_quota | float64 | 6685 | 125 | [11065.0, 6237.0, 5580.0, 17063.0, 7770.0, 201... |
| school_education_centers_raion | int64 | 0 | 14 | [5, 8, 7, 10, 9, 3, 6, 4, 14, 1, 13, 11, 2, 0] |
| school_education_centers_top_20_raion | int64 | 0 | 3 | [0, 1, 2] |
| hospital_beds_raion | float64 | 14441 | 79 | [240.0, 229.0, 1183.0, nan, 562.0, 4849.0, 189... |
| healthcare_centers_raion | int64 | 0 | 7 | [1, 4, 0, 3, 2, 5, 6] |
| university_top_20_raion | int64 | 0 | 4 | [0, 2, 1, 3] |
| sport_objects_raion | int64 | 0 | 24 | [7, 6, 5, 17, 25, 4, 3, 29, 12, 16, 2, 0, 10, ... |
| additional_education_raion | int64 | 0 | 12 | [3, 1, 6, 2, 0, 16, 8, 4, 5, 11, 10, 7] |
| culture_objects_top_25 | object | 0 | 2 | [no, yes] |
| culture_objects_top_25_raion | int64 | 0 | 6 | [0, 1, 3, 2, 4, 10] |
| shopping_centers_raion | int64 | 0 | 16 | [16, 3, 0, 11, 10, 6, 5, 7, 15, 2, 1, 9, 4, 8,... |
| office_raion | int64 | 0 | 30 | [1, 0, 4, 93, 19, 9, 7, 3, 84, 14, 2, 6, 5, 48... |
| thermal_power_plant_raion | object | 0 | 2 | [no, yes] |
| incineration_raion | object | 0 | 2 | [no, yes] |
| oil_chemistry_raion | object | 0 | 2 | [no, yes] |
| radiation_raion | object | 0 | 2 | [no, yes] |
| railroad_terminal_raion | object | 0 | 2 | [no, yes] |
| big_market_raion | object | 0 | 2 | [no, yes] |
| nuclear_reactor_raion | object | 0 | 2 | [no, yes] |
| detention_facility_raion | object | 0 | 2 | [no, yes] |
| full_all | int64 | 0 | 146 | [86206, 76284, 101982, 21155, 28179, 19940, 85... |
| male_f | int64 | 0 | 146 | [40477, 34200, 46076, 9828, 13522, 9400, 40724... |
| female_f | int64 | 0 | 146 | [45729, 42084, 55906, 11327, 14657, 10540, 452... |
| young_all | int64 | 0 | 146 | [21154, 15727, 13028, 28563, 13368, 5291, 5682... |
| young_male | int64 | 0 | 145 | [11007, 7925, 6835, 14680, 7159, 2744, 2925, 7... |
| young_female | int64 | 0 | 145 | [10147, 7802, 6193, 13883, 6209, 2547, 2757, 6... |
| work_all | int64 | 0 | 145 | [98207, 70194, 63388, 120381, 68043, 29660, 35... |
| work_male | int64 | 0 | 145 | [52277, 35622, 31813, 60040, 34236, 15793, 174... |
| work_female | int64 | 0 | 146 | [45930, 34572, 31575, 60341, 33807, 13867, 175... |
| ekder_all | int64 | 0 | 146 | [36211, 29431, 25292, 29529, 26760, 8844, 1672... |
| ekder_male | int64 | 0 | 146 | [10580, 9266, 7609, 9083, 8563, 2608, 5351, 69... |
| ekder_female | int64 | 0 | 146 | [25631, 20165, 17683, 20446, 18197, 6236, 1136... |
| 0_6_all | int64 | 0 | 146 | [9576, 6880, 5879, 13087, 5706, 2418, 2459, 65... |
| 0_6_male | int64 | 0 | 144 | [4899, 3466, 3095, 6645, 2982, 1224, 1241, 345... |
| 0_6_female | int64 | 0 | 145 | [4677, 3414, 2784, 6442, 2724, 1194, 1218, 305... |
| 7_14_all | int64 | 0 | 146 | [10309, 7759, 6207, 13670, 6748, 2514, 2810, 6... |
| 7_14_male | int64 | 0 | 142 | [5463, 3909, 3269, 7126, 3664, 1328, 1472, 345... |
| 7_14_female | int64 | 0 | 145 | [4846, 3850, 2938, 6544, 3084, 1186, 1338, 311... |
| 0_17_all | int64 | 0 | 145 | [23603, 17700, 14884, 32063, 15237, 5866, 6510... |
| 0_17_male | int64 | 0 | 146 | [12286, 8998, 7821, 16513, 8113, 3035, 3345, 8... |
| 0_17_female | int64 | 0 | 146 | [11317, 8702, 7063, 15550, 7124, 2831, 3165, 7... |
| 16_29_all | int64 | 0 | 145 | [17508, 15164, 19401, 3292, 5164, 4851, 19445,... |
| 16_29_male | int64 | 0 | 145 | [9425, 7571, 9045, 1450, 2583, 2329, 10085, 84... |
| 16_29_female | int64 | 0 | 146 | [8083, 7593, 10356, 1842, 2581, 2522, 9360, 91... |
| 0_13_all | int64 | 0 | 146 | [18654, 13729, 11252, 24934, 11631, 4632, 4884... |
| 0_13_male | int64 | 0 | 144 | [9709, 6929, 5916, 12782, 6223, 2399, 2507, 64... |
| 0_13_female | int64 | 0 | 146 | [8945, 6800, 5336, 12152, 5408, 2233, 2377, 57... |
| raion_build_count_with_material_info | float64 | 4991 | 112 | [211.0, 245.0, 330.0, 458.0, 746.0, 188.0, 217... |
| build_count_block | float64 | 4991 | 76 | [25.0, 83.0, 59.0, 9.0, 48.0, 24.0, 23.0, 101.... |
| build_count_wood | float64 | 4991 | 34 | [0.0, 1.0, 51.0, 2.0, 204.0, 793.0, 11.0, 6.0,... |
| build_count_frame | float64 | 4991 | 21 | [0.0, 12.0, 14.0, 36.0, 1.0, 97.0, 4.0, 83.0, ... |
| build_count_brick | float64 | 4991 | 101 | [0.0, 67.0, 206.0, 124.0, 643.0, 147.0, 139.0,... |
| build_count_monolith | float64 | 4991 | 33 | [2.0, 4.0, 50.0, 16.0, 12.0, 11.0, 21.0, 14.0,... |
| build_count_panel | float64 | 4991 | 91 | [184.0, 90.0, 60.0, 201.0, 35.0, 15.0, 41.0, 1... |
| build_count_foam | float64 | 4991 | 4 | [0.0, 2.0, 1.0, 11.0, nan] |
| build_count_slag | float64 | 4991 | 21 | [0.0, 1.0, 9.0, 3.0, 10.0, 64.0, 2.0, 41.0, 12... |
| build_count_mix | float64 | 4991 | 9 | [0.0, 2.0, 1.0, 9.0, 6.0, 8.0, 5.0, 3.0, 4.0, ... |
| raion_build_count_with_builddate_info | float64 | 4991 | 114 | [211.0, 244.0, 330.0, 459.0, 746.0, 188.0, 216... |
| build_count_before_1920 | float64 | 4991 | 29 | [0.0, 1.0, 13.0, 371.0, 11.0, 47.0, 298.0, 240... |
| build_count_1921-1945 | float64 | 4991 | 47 | [0.0, 1.0, 24.0, 114.0, 5.0, 38.0, 9.0, 88.0, ... |
| build_count_1946-1970 | float64 | 4991 | 100 | [0.0, 143.0, 246.0, 40.0, 146.0, 152.0, 90.0, ... |
| build_count_1971-1995 | float64 | 4991 | 92 | [206.0, 84.0, 63.0, 130.0, 62.0, 25.0, 58.0, 3... |
| build_count_after_1995 | float64 | 4991 | 69 | [5.0, 15.0, 20.0, 252.0, 53.0, 6.0, 19.0, 51.0... |
| ID_metro | int64 | 0 | 223 | [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... |
| metro_min_avto | float64 | 0 | 11843 | [2.590241095, 0.936699728, 2.120998901, 1.4890... |
| metro_km_avto | float64 | 0 | 11843 | [1.131259906, 0.647336757, 1.637996285, 0.9845... |
| metro_min_walk | float64 | 25 | 11834 | [13.57511887, 7.620630407999999, 17.3515153999... |
| metro_km_walk | float64 | 25 | 11834 | [1.131259906, 0.635052534, 1.445959617, 0.9638... |
| kindergarten_km | float64 | 0 | 11852 | [0.14569955199999998, 0.147754269, 0.049101535... |
| school_km | float64 | 0 | 11825 | [0.17797535, 0.273345319, 0.158071895, 0.23645... |
| park_km | float64 | 0 | 11852 | [2.158587074, 0.550689737, 0.374847751, 0.0780... |
| green_zone_km | float64 | 0 | 11735 | [0.600973099, 0.065321162, 0.453172405, 0.1061... |
| industrial_km | float64 | 0 | 11723 | [1.080934313, 0.966479097, 0.939275144, 0.4511... |
| water_treatment_km | float64 | 0 | 11828 | [23.68346, 1.317476, 4.91266, 15.62371, 10.683... |
| cemetery_km | float64 | 0 | 11828 | [1.804127, 4.655004, 3.3810830000000003, 2.017... |
| incineration_km | float64 | 0 | 11829 | [3.6333339999999996, 8.648587, 11.99648, 14.31... |
| railroad_station_walk_km | float64 | 25 | 11834 | [5.419893032, 3.4119930839999997, 1.277658039,... |
| railroad_station_walk_min | float64 | 25 | 11834 | [65.03871639, 40.943917, 15.33189647, 51.49719... |
| ID_railroad_station_walk | float64 | 25 | 133 | [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ... |
| railroad_station_avto_km | float64 | 0 | 11843 | [5.419893032, 3.641772591, 1.277658039, 3.8160... |
| railroad_station_avto_min | float64 | 0 | 11843 | [6.905892968, 4.679744508, 1.7014195369999998,... |
| ID_railroad_station_avto | int64 | 0 | 133 | [1, 2, 3, 4, 113, 6, 7, 9, 22, 11, 128, 13, 14... |
| public_transport_station_km | float64 | 0 | 11851 | [0.274985143, 0.065263344, 0.32875604399999997... |
| public_transport_station_min_walk | float64 | 0 | 11852 | [3.2998217139999997, 0.78316013, 3.945072522, ... |
| water_km | float64 | 0 | 11851 | [0.992631058, 0.698081318, 0.468264622, 1.2003... |
| water_1line | object | 0 | 2 | [no, yes] |
| mkad_km | float64 | 0 | 11852 | [1.42239141, 9.503405157000001, 5.60479992, 2.... |
| ttk_km | float64 | 0 | 11852 | [10.9185867, 3.1039959539999997, 2.92748709699... |
| sadovoe_km | float64 | 0 | 11852 | [13.10061764, 6.444333466000001, 6.963402995, ... |
| bulvar_ring_km | float64 | 0 | 11852 | [13.67565705, 8.132640073, 8.054252314, 18.309... |
| kremlin_km | float64 | 0 | 11852 | [15.15621058, 8.698054189, 9.067884956, 19.487... |
| big_road1_km | float64 | 0 | 11852 | [1.422391404, 2.887376585, 0.647249803, 2.6778... |
| ID_big_road1 | int64 | 0 | 48 | [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... |
| big_road1_1line | object | 0 | 2 | [no, yes] |
| big_road2_km | float64 | 0 | 11852 | [3.830951404, 3.1039959739999996, 2.927487099,... |
| ID_big_road2 | int64 | 0 | 58 | [5, 4, 17, 10, 3, 20, 36, 14, 9, 11, 1, 33, 32... |
| railroad_km | float64 | 0 | 11852 | [1.305159492, 0.694535727, 0.70069112, 1.99926... |
| railroad_1line | object | 0 | 2 | [no, yes] |
| zd_vokzaly_avto_km | float64 | 0 | 11843 | [14.23196091, 9.242585522, 9.540544478, 17.478... |
| ID_railroad_terminal | int64 | 0 | 8 | [101, 32, 5, 83, 113, 97, 121, 50] |
| bus_terminal_avto_km | float64 | 0 | 11843 | [24.2924061, 5.706113234, 6.710302485, 6.73461... |
| ID_bus_terminal | int64 | 0 | 14 | [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] |
| oil_chemistry_km | float64 | 0 | 11852 | [18.152338, 9.034641872, 5.777393501000001, 27... |
| nuclear_reactor_km | float64 | 0 | 11852 | [5.718518835, 3.4899544430000002, 7.50661249, ... |
| radiation_km | float64 | 0 | 11852 | [1.210027392, 2.72429538, 0.7722161040000001, ... |
| power_transmission_line_km | float64 | 0 | 11852 | [1.0625130459999999, 1.2461487390000001, 1.602... |
| thermal_power_plant_km | float64 | 0 | 11852 | [5.814134663, 3.419574049, 3.682454651, 11.178... |
| ts_km | float64 | 0 | 11849 | [4.308127002, 0.7255604309999999, 3.562187704,... |
| big_market_km | float64 | 0 | 11843 | [10.81417151, 6.910567711000001, 5.75236835, 2... |
| market_shop_km | float64 | 0 | 11843 | [1.6762583130000002, 3.4247160919999997, 1.375... |
| fitness_km | float64 | 0 | 11775 | [0.485841388, 0.6683636789999999, 0.733101062,... |
| swim_pool_km | float64 | 0 | 11843 | [3.0650470989999996, 2.000153804, 1.239303854,... |
| ice_rink_km | float64 | 0 | 11843 | [1.1075942090000002, 8.97282283, 1.97851718699... |
| stadium_km | float64 | 0 | 11843 | [8.148590774, 6.127072782000001, 0.767568769, ... |
| basketball_km | float64 | 0 | 11852 | [3.5165129110000004, 1.161578983, 1.952770629,... |
| hospice_morgue_km | float64 | 0 | 11852 | [2.392353035, 2.543746975, 0.621357002, 3.5495... |
| detention_facility_km | float64 | 0 | 11843 | [4.2480358869999995, 12.64987875, 7.682302975,... |
| public_healthcare_km | float64 | 0 | 11843 | [0.974742843, 1.47772267, 0.097143527, 2.16373... |
| university_km | float64 | 0 | 11843 | [6.715025787, 1.852560245, 0.8412541020000001,... |
| workplaces_km | float64 | 0 | 11844 | [0.8843500209999999, 0.686251693, 1.5100888540... |
| shopping_centers_km | float64 | 0 | 11813 | [0.648487637, 0.519311324, 1.48653302, 0.59991... |
| office_km | float64 | 0 | 11806 | [0.637188832, 0.688796317, 1.5430488359999999,... |
| additional_education_km | float64 | 0 | 11843 | [0.947961657, 1.072315063, 0.391957389, 0.8926... |
| preschool_km | float64 | 0 | 11828 | [0.17797535, 0.273345319, 0.158071895, 0.23645... |
| big_church_km | float64 | 0 | 11852 | [0.625783434, 0.967820571, 3.178751487, 1.0317... |
| church_synagogue_km | float64 | 0 | 11852 | [0.628186549, 0.471446524, 0.755946015, 1.5615... |
| mosque_km | float64 | 0 | 11852 | [3.932040333, 4.841543888, 7.92215157, 15.3004... |
| theater_km | float64 | 0 | 11843 | [14.05304655, 6.829888847, 4.273200485, 16.990... |
| museum_km | float64 | 0 | 11852 | [7.389497904, 0.709260033, 3.156422843, 16.041... |
| exhibition_km | float64 | 0 | 11852 | [7.023704919, 2.358840498, 4.958214283, 5.0296... |
| catering_km | float64 | 0 | 11852 | [0.516838085, 0.23028691, 0.190461977, 0.46582... |
| ecology | object | 0 | 5 | [good, excellent, poor, satisfactory, no data] |
| green_part_500 | float64 | 0 | 3313 | [0.0, 25.14, 1.67, 17.36, 3.56, 17.62, 7.71, 3... |
| prom_part_500 | float64 | 0 | 2571 | [0.0, 0.57, 4.44, 19.42, 15.12, 39.33, 13.12, ... |
| office_count_500 | int64 | 0 | 30 | [0, 15, 5, 3, 1, 2, 4, 8, 6, 10, 7, 25, 9, 12,... |
| office_sqm_500 | int64 | 0 | 1070 | [0, 293699, 227705, 7719, 15565, 34565, 122400... |
| trc_count_500 | int64 | 0 | 9 | [0, 1, 3, 2, 5, 4, 6, 7, 8] |
| trc_sqm_500 | int64 | 0 | 500 | [0, 45000, 102000, 8499, 3164, 420403, 7208, 3... |
| cafe_count_500 | int64 | 0 | 95 | [0, 5, 3, 2, 48, 7, 4, 1, 13, 6, 16, 20, 23, 5... |
| cafe_sum_500_min_price_avg | float64 | 13281 | 655 | [nan, 860.0, 666.67, 1000.0, 702.22, 750.0, 63... |
| cafe_sum_500_max_price_avg | float64 | 13281 | 446 | [nan, 1500.0, 1166.67, 1625.0, 1250.0, 1083.33... |
| cafe_avg_price_500 | float64 | 13281 | 860 | [nan, 1180.0, 916.67, 1250.0, 934.44, 1312.5, ... |
| cafe_count_500_na_price | int64 | 0 | 13 | [0, 3, 1, 2, 4, 7, 9, 5, 6, 10, 13, 8, 11] |
| cafe_count_500_price_500 | int64 | 0 | 33 | [0, 1, 17, 2, 3, 4, 10, 5, 8, 19, 7, 16, 9, 11... |
| cafe_count_500_price_1000 | int64 | 0 | 32 | [0, 3, 2, 10, 1, 7, 5, 6, 9, 13, 4, 8, 11, 14,... |
| cafe_count_500_price_1500 | int64 | 0 | 29 | [0, 1, 2, 11, 3, 8, 4, 16, 6, 13, 5, 10, 9, 7,... |
| cafe_count_500_price_2500 | int64 | 0 | 21 | [0, 7, 1, 2, 3, 6, 4, 14, 5, 13, 9, 11, 19, 8,... |
| cafe_count_500_price_4000 | int64 | 0 | 14 | [0, 1, 2, 4, 3, 5, 11, 8, 7, 6, 9, 10, 13, 14] |
| cafe_count_500_price_high | int64 | 0 | 4 | [0, 1, 2, 3] |
| big_church_count_500 | int64 | 0 | 11 | [0, 1, 2, 8, 3, 4, 5, 6, 10, 11, 7] |
| church_count_500 | int64 | 0 | 15 | [0, 1, 4, 2, 3, 15, 6, 5, 8, 9, 7, 10, 12, 17,... |
| mosque_count_500 | int64 | 0 | 2 | [0, 1] |
| leisure_count_500 | int64 | 0 | 10 | [0, 2, 1, 4, 3, 6, 5, 7, 9, 8] |
| sport_count_500 | int64 | 0 | 12 | [1, 0, 3, 2, 5, 4, 6, 8, 9, 7, 10, 11] |
| market_count_500 | int64 | 0 | 5 | [0, 1, 2, 3, 4] |
| green_part_1000 | float64 | 0 | 3735 | [7.36, 26.66, 4.99, 19.25, 3.34, 0.0, 14.59, 2... |
| prom_part_1000 | float64 | 0 | 3175 | [0.0, 0.07, 0.29, 10.35, 8.29, 40.27, 4.16, 6.... |
| office_count_1000 | int64 | 0 | 84 | [1, 2, 0, 46, 10, 8, 19, 6, 3, 16, 4, 17, 11, ... |
| office_sqm_1000 | int64 | 0 | 1942 | [30500, 86600, 0, 11000, 420952, 275135, 15191... |
| trc_count_1000 | int64 | 0 | 19 | [3, 5, 0, 6, 1, 2, 7, 4, 8, 12, 9, 20, 13, 10,... |
| trc_sqm_1000 | int64 | 0 | 1063 | [55600, 94065, 0, 80780, 158200, 164000, 19400... |
| cafe_count_1000 | int64 | 0 | 235 | [19, 13, 9, 12, 153, 16, 10, 5, 2, 1, 4, 55, 3... |
| cafe_sum_1000_min_price_avg | float64 | 6524 | 1627 | [527.78, 615.38, 642.86, 658.33, 763.45, 883.3... |
| cafe_sum_1000_max_price_avg | float64 | 6524 | 1054 | [888.89, 1076.92, 1142.86, 1083.33, 1272.41, 1... |
| cafe_avg_price_1000 | float64 | 6524 | 2134 | [708.33, 846.15, 892.86, 870.83, 1017.93, 1150... |
| cafe_count_1000_na_price | int64 | 0 | 28 | [1, 0, 2, 8, 3, 7, 5, 4, 12, 20, 13, 14, 6, 15... |
| cafe_count_1000_price_500 | int64 | 0 | 82 | [10, 5, 0, 3, 39, 1, 4, 2, 15, 9, 7, 6, 16, 11... |
| cafe_count_1000_price_1000 | int64 | 0 | 83 | [4, 6, 5, 45, 1, 7, 2, 0, 15, 3, 8, 11, 10, 14... |
| cafe_count_1000_price_1500 | int64 | 0 | 84 | [3, 1, 2, 5, 39, 0, 11, 7, 4, 6, 23, 14, 29, 1... |
| cafe_count_1000_price_2500 | int64 | 0 | 57 | [1, 0, 19, 2, 11, 4, 5, 6, 3, 10, 17, 7, 15, 5... |
| cafe_count_1000_price_4000 | int64 | 0 | 29 | [0, 1, 2, 5, 13, 3, 6, 27, 4, 12, 10, 7, 8, 9,... |
| cafe_count_1000_price_high | int64 | 0 | 8 | [0, 1, 2, 5, 3, 6, 4, 7] |
| big_church_count_1000 | int64 | 0 | 24 | [1, 0, 7, 3, 2, 4, 6, 5, 16, 9, 8, 15, 13, 10,... |
| church_count_1000 | int64 | 0 | 36 | [2, 1, 0, 12, 4, 3, 5, 9, 6, 10, 13, 35, 11, 1... |
| mosque_count_1000 | int64 | 0 | 2 | [0, 1] |
| leisure_count_1000 | int64 | 0 | 26 | [0, 4, 6, 2, 1, 11, 5, 8, 7, 3, 9, 30, 21, 18,... |
| sport_count_1000 | int64 | 0 | 23 | [6, 2, 5, 3, 7, 1, 0, 8, 4, 10, 14, 9, 11, 13,... |
| market_count_1000 | int64 | 0 | 7 | [1, 0, 3, 2, 4, 5, 6] |
| green_part_1500 | float64 | 0 | 3934 | [14.27, 21.53, 9.92, 28.38, 4.12, 0.0, 20.5, 1... |
| prom_part_1500 | float64 | 0 | 3332 | [6.92, 7.71, 6.73, 6.57, 4.83, 50.64, 5.57, 1.... |
| office_count_1500 | int64 | 0 | 154 | [3, 0, 2, 93, 18, 20, 5, 1, 38, 12, 4, 44, 6, ... |
| office_sqm_1500 | int64 | 0 | 2639 | [39554, 102910, 0, 11000, 1195735, 431090, 453... |
| trc_count_1500 | int64 | 0 | 27 | [9, 7, 1, 6, 11, 0, 5, 4, 2, 12, 13, 3, 8, 10,... |
| trc_sqm_1500 | int64 | 0 | 1785 | [171420, 127065, 2600, 89492, 445900, 186400, ... |
| cafe_count_1500 | int64 | 0 | 374 | [34, 17, 14, 23, 272, 44, 29, 15, 9, 2, 4, 13,... |
| cafe_sum_1500_min_price_avg | float64 | 4199 | 2582 | [566.67, 694.12, 516.67, 673.91, 766.8, 718.18... |
| cafe_sum_1500_max_price_avg | float64 | 4199 | 1723 | [969.7, 1205.88, 916.67, 1130.43, 1272.73, 118... |
| cafe_avg_price_1500 | float64 | 4199 | 3330 | [768.18, 950.0, 716.67, 902.17, 1019.76, 915.8... |
| cafe_count_1500_na_price | int64 | 0 | 48 | [1, 0, 2, 19, 3, 13, 4, 5, 16, 23, 6, 11, 9, 7... |
| cafe_count_1500_price_500 | int64 | 0 | 147 | [14, 6, 4, 5, 70, 3, 12, 13, 2, 0, 1, 19, 10, ... |
| cafe_count_1500_price_1000 | int64 | 0 | 145 | [11, 7, 6, 9, 74, 3, 17, 4, 1, 0, 25, 8, 5, 2,... |
| cafe_count_1500_price_1500 | int64 | 0 | 135 | [6, 1, 2, 8, 72, 4, 7, 0, 5, 32, 3, 9, 18, 12,... |
| cafe_count_1500_price_2500 | int64 | 0 | 97 | [2, 0, 1, 30, 4, 3, 29, 6, 5, 21, 8, 10, 7, 24... |
| cafe_count_1500_price_4000 | int64 | 0 | 53 | [0, 1, 6, 9, 3, 4, 32, 2, 7, 10, 45, 31, 35, 5... |
| cafe_count_1500_price_high | int64 | 0 | 13 | [0, 1, 5, 2, 4, 3, 12, 6, 9, 11, 7, 8, 10] |
| big_church_count_1500 | int64 | 0 | 42 | [1, 0, 18, 4, 2, 7, 8, 3, 5, 13, 6, 11, 9, 15,... |
| church_count_1500 | int64 | 0 | 62 | [2, 5, 4, 0, 30, 11, 3, 1, 6, 9, 8, 14, 7, 10,... |
| mosque_count_1500 | int64 | 0 | 2 | [0, 1] |
| leisure_count_1500 | int64 | 0 | 41 | [0, 4, 10, 3, 1, 2, 5, 9, 12, 8, 20, 7, 13, 18... |
| sport_count_1500 | int64 | 0 | 34 | [7, 9, 6, 14, 11, 20, 10, 1, 3, 12, 8, 4, 0, 5... |
| market_count_1500 | int64 | 0 | 8 | [1, 0, 5, 2, 3, 4, 6, 7] |
| green_part_2000 | float64 | 0 | 3981 | [11.77, 22.37, 12.99, 32.29, 4.53, 0.38, 23.45... |
| prom_part_2000 | float64 | 0 | 3348 | [15.97, 19.25, 12.75, 5.73, 5.02, 51.58, 5.25,... |
| office_count_2000 | int64 | 0 | 226 | [9, 4, 2, 149, 21, 42, 6, 12, 0, 1, 5, 58, 25,... |
| office_sqm_2000 | int64 | 0 | 3288 | [188854, 165510, 100200, 11000, 1625130, 47129... |
| trc_count_2000 | int64 | 0 | 37 | [19, 8, 7, 17, 14, 12, 0, 4, 6, 15, 9, 16, 5, ... |
| trc_sqm_2000 | int64 | 0 | 2397 | [1244891, 179065, 52550, 89492, 564843, 683945... |
| cafe_count_2000 | int64 | 0 | 529 | [36, 21, 24, 25, 483, 33, 71, 18, 22, 2, 11, 2... |
| cafe_sum_2000_min_price_avg | float64 | 1725 | 3537 | [614.29, 695.24, 563.64, 660.0, 765.93, 741.38... |
| cafe_sum_2000_max_price_avg | float64 | 1725 | 2429 | [1042.86, 1190.48, 977.27, 1120.0, 1269.23, 12... |
| cafe_avg_price_2000 | float64 | 1725 | 4505 | [828.57, 942.86, 770.45, 890.0, 1017.58, 1000.... |
| cafe_count_2000_na_price | int64 | 0 | 68 | [1, 0, 2, 28, 4, 6, 14, 3, 7, 8, 11, 19, 9, 5,... |
| cafe_count_2000_price_500 | int64 | 0 | 212 | [15, 7, 8, 5, 130, 16, 2, 0, 1, 4, 36, 14, 19,... |
| cafe_count_2000_price_1000 | int64 | 0 | 210 | [11, 8, 9, 129, 13, 24, 7, 10, 1, 4, 41, 19, 1... |
| cafe_count_2000_price_1500 | int64 | 0 | 215 | [6, 3, 4, 8, 131, 17, 1, 50, 14, 9, 0, 7, 2, 1... |
| cafe_count_2000_price_2500 | int64 | 0 | 145 | [2, 1, 50, 7, 4, 0, 39, 3, 5, 18, 8, 22, 11, 1... |
| cafe_count_2000_price_4000 | int64 | 0 | 73 | [1, 0, 14, 18, 2, 6, 5, 3, 4, 12, 40, 9, 16, 2... |
| cafe_count_2000_price_high | int64 | 0 | 17 | [0, 1, 6, 2, 3, 4, 5, 10, 7, 8, 15, 11, 12, 16... |
| big_church_count_2000 | int64 | 0 | 63 | [1, 0, 35, 6, 9, 2, 8, 3, 5, 4, 23, 17, 11, 10... |
| church_count_2000 | int64 | 0 | 95 | [2, 5, 4, 1, 61, 14, 3, 6, 7, 8, 0, 10, 11, 32... |
| mosque_count_2000 | int64 | 0 | 2 | [0, 1] |
| leisure_count_2000 | int64 | 0 | 52 | [0, 4, 17, 1, 3, 6, 2, 5, 9, 14, 8, 11, 7, 47,... |
| sport_count_2000 | int64 | 0 | 53 | [10, 11, 8, 13, 21, 28, 9, 14, 1, 7, 23, 17, 1... |
| market_count_2000 | int64 | 0 | 9 | [1, 0, 5, 2, 3, 4, 7, 6, 8] |
| green_part_3000 | float64 | 0 | 3924 | [11.98, 18.07, 12.14, 20.79, 5.06, 1.82, 22.2,... |
| prom_part_3000 | float64 | 0 | 3061 | [13.55, 27.32, 26.46, 3.57, 8.62, 39.99, 7.57,... |
| office_count_3000 | int64 | 0 | 395 | [12, 8, 4, 305, 54, 72, 33, 10, 0, 2, 120, 61,... |
| office_sqm_3000 | int64 | 0 | 4281 | [251554, 821986, 110856, 167000, 3420907, 1181... |
| trc_count_3000 | int64 | 0 | 65 | [23, 14, 7, 12, 60, 29, 24, 21, 2, 11, 15, 22,... |
| trc_sqm_3000 | int64 | 0 | 3419 | [1419204, 491565, 52550, 205756, 2296870, 1059... |
| cafe_count_3000 | int64 | 0 | 877 | [68, 30, 41, 32, 1068, 120, 160, 55, 98, 56, 6... |
| cafe_sum_3000_min_price_avg | float64 | 991 | 5163 | [639.68, 631.03, 697.44, 718.75, 853.03, 737.9... |
| cafe_sum_3000_max_price_avg | float64 | 991 | 3885 | [1079.37, 1086.21, 1192.31, 1218.75, 1410.45, ... |
| cafe_avg_price_3000 | float64 | 991 | 6087 | [859.52, 858.62, 944.87, 968.75, 1131.74, 984.... |
| cafe_count_3000_na_price | int64 | 0 | 112 | [5, 1, 2, 0, 63, 12, 7, 4, 11, 3, 6, 31, 10, 2... |
| cafe_count_3000_price_500 | int64 | 0 | 360 | [21, 11, 9, 5, 266, 24, 41, 36, 0, 7, 91, 29, ... |
| cafe_count_3000_price_1000 | int64 | 0 | 358 | [22, 11, 17, 14, 267, 37, 57, 15, 28, 2, 12, 9... |
| cafe_count_3000_price_1500 | int64 | 0 | 343 | [16, 4, 9, 10, 262, 35, 37, 11, 21, 12, 109, 3... |
| cafe_count_3000_price_2500 | int64 | 0 | 244 | [3, 2, 149, 11, 16, 1, 9, 0, 81, 10, 4, 8, 6, ... |
| cafe_count_3000_price_4000 | int64 | 0 | 112 | [1, 0, 57, 2, 8, 44, 3, 9, 13, 5, 4, 16, 41, 3... |
| cafe_count_3000_price_high | int64 | 0 | 24 | [0, 4, 11, 1, 2, 9, 14, 6, 10, 8, 3, 15, 16, 1... |
| big_church_count_3000 | int64 | 0 | 101 | [2, 1, 0, 70, 12, 17, 3, 4, 5, 15, 18, 6, 7, 3... |
| church_count_3000 | int64 | 0 | 160 | [4, 7, 11, 2, 121, 12, 29, 8, 9, 3, 5, 19, 22,... |
| mosque_count_3000 | int64 | 0 | 3 | [0, 1, 2] |
| leisure_count_3000 | int64 | 0 | 80 | [0, 6, 40, 2, 8, 1, 10, 4, 3, 5, 15, 16, 11, 3... |
| sport_count_3000 | int64 | 0 | 101 | [21, 19, 20, 18, 77, 31, 56, 24, 32, 4, 17, 23... |
| market_count_3000 | int64 | 0 | 11 | [1, 6, 3, 5, 7, 2, 0, 4, 8, 10, 9] |
| green_part_5000 | float64 | 0 | 3514 | [13.09, 10.26, 13.69, 14.18, 8.38, 5.92, 25.23... |
| prom_part_5000 | float64 | 178 | 2399 | [13.31, 27.47, 21.58, 3.89, 10.92, 25.79, 12.7... |
| office_count_5000 | int64 | 0 | 725 | [29, 66, 43, 8, 689, 253, 228, 24, 94, 30, 2, ... |
| office_sqm_5000 | int64 | 0 | 5970 | [807385, 2690465, 1478160, 244166, 8404624, 42... |
| trc_count_5000 | int64 | 0 | 121 | [52, 40, 35, 22, 114, 63, 49, 45, 41, 32, 20, ... |
| trc_sqm_5000 | int64 | 0 | 5204 | [4036616, 2034942, 1572990, 942180, 3503058, 2... |
| cafe_count_5000 | int64 | 0 | 1580 | [152, 177, 122, 61, 2283, 567, 635, 143, 292, ... |
| cafe_sum_5000_min_price_avg | float64 | 297 | 7249 | [708.57, 673.81, 702.68, 931.58, 853.88, 769.9... |
| cafe_sum_5000_max_price_avg | float64 | 297 | 6305 | [1185.71, 1148.81, 1196.43, 1552.63, 1411.45, ... |
| cafe_avg_price_5000 | float64 | 297 | 7857 | [947.14, 911.31, 949.55, 1242.11, 1132.66, 102... |
| cafe_count_5000_na_price | int64 | 0 | 175 | [12, 9, 10, 4, 143, 35, 34, 11, 28, 14, 1, 111... |
| cafe_count_5000_price_500 | int64 | 0 | 610 | [39, 49, 29, 7, 566, 137, 163, 37, 86, 28, 8, ... |
| cafe_count_5000_price_1000 | int64 | 0 | 603 | [48, 65, 45, 21, 578, 163, 194, 46, 81, 39, 16... |
| cafe_count_5000_price_1500 | int64 | 0 | 599 | [40, 36, 25, 15, 552, 155, 144, 69, 30, 11, 39... |
| cafe_count_5000_price_2500 | int64 | 0 | 375 | [9, 15, 10, 11, 319, 62, 81, 19, 1, 8, 13, 254... |
| cafe_count_5000_price_4000 | int64 | 0 | 148 | [4, 3, 2, 108, 14, 16, 8, 13, 1, 9, 19, 6, 0, ... |
| cafe_count_5000_price_high | int64 | 0 | 31 | [0, 1, 17, 3, 22, 2, 6, 4, 10, 11, 5, 25, 20, ... |
| big_church_count_5000 | int64 | 0 | 152 | [13, 15, 11, 4, 135, 53, 38, 18, 10, 5, 7, 57,... |
| church_count_5000 | int64 | 0 | 251 | [22, 29, 27, 4, 236, 78, 80, 18, 34, 20, 9, 15... |
| mosque_count_5000 | int64 | 0 | 3 | [1, 0, 2] |
| leisure_count_5000 | int64 | 0 | 107 | [0, 10, 4, 91, 20, 27, 3, 2, 72, 28, 1, 9, 5, ... |
| sport_count_5000 | int64 | 0 | 216 | [52, 66, 67, 26, 195, 113, 127, 47, 85, 17, 35... |
| market_count_5000 | int64 | 0 | 22 | [4, 14, 10, 3, 17, 8, 11, 1, 6, 0, 7, 9, 13, 1... |
| price_doc | int64 | 0 | 9296 | [5850000, 6000000, 5700000, 13100000, 16331452... |
features_with_na=[features for features in train_data.columns if train_data[features].isnull().sum()>1]
for feature in features_with_na:
print(feature, np.round(train_data[feature].isnull().mean(), 4), ' % missing values')
life_sq 0.2095 % missing values floor 0.0055 % missing values max_floor 0.3141 % missing values material 0.3141 % missing values build_year 0.4465 % missing values num_room 0.3141 % missing values kitch_sq 0.3141 % missing values state 0.445 % missing values preschool_quota 0.2195 % missing values school_quota 0.2194 % missing values hospital_beds_raion 0.4739 % missing values raion_build_count_with_material_info 0.1638 % missing values build_count_block 0.1638 % missing values build_count_wood 0.1638 % missing values build_count_frame 0.1638 % missing values build_count_brick 0.1638 % missing values build_count_monolith 0.1638 % missing values build_count_panel 0.1638 % missing values build_count_foam 0.1638 % missing values build_count_slag 0.1638 % missing values build_count_mix 0.1638 % missing values raion_build_count_with_builddate_info 0.1638 % missing values build_count_before_1920 0.1638 % missing values build_count_1921-1945 0.1638 % missing values build_count_1946-1970 0.1638 % missing values build_count_1971-1995 0.1638 % missing values build_count_after_1995 0.1638 % missing values metro_min_walk 0.0008 % missing values metro_km_walk 0.0008 % missing values railroad_station_walk_km 0.0008 % missing values railroad_station_walk_min 0.0008 % missing values ID_railroad_station_walk 0.0008 % missing values cafe_sum_500_min_price_avg 0.4359 % missing values cafe_sum_500_max_price_avg 0.4359 % missing values cafe_avg_price_500 0.4359 % missing values cafe_sum_1000_min_price_avg 0.2141 % missing values cafe_sum_1000_max_price_avg 0.2141 % missing values cafe_avg_price_1000 0.2141 % missing values cafe_sum_1500_min_price_avg 0.1378 % missing values cafe_sum_1500_max_price_avg 0.1378 % missing values cafe_avg_price_1500 0.1378 % missing values cafe_sum_2000_min_price_avg 0.0566 % missing values cafe_sum_2000_max_price_avg 0.0566 % missing values cafe_avg_price_2000 0.0566 % missing values cafe_sum_3000_min_price_avg 0.0325 % missing values cafe_sum_3000_max_price_avg 0.0325 % missing values cafe_avg_price_3000 0.0325 % missing values prom_part_5000 0.0058 % missing values cafe_sum_5000_min_price_avg 0.0097 % missing values cafe_sum_5000_max_price_avg 0.0097 % missing values cafe_avg_price_5000 0.0097 % missing values
for feature in features_with_na:
data = train_data.copy()
data[feature] = np.where(data[feature].isnull(), 1, 0)
data.groupby(feature)['price_doc'].median().plot.bar()
plt.title(feature)
plt.show()
numerical_features = [feature for feature in train_data.columns if train_data[feature].dtypes != 'O']
print('Number of numerical variables: ', len(numerical_features))
train_data[numerical_features].head()
Number of numerical variables: 276
| id | full_sq | life_sq | floor | max_floor | material | build_year | num_room | kitch_sq | state | area_m | raion_popul | green_zone_part | indust_part | children_preschool | preschool_quota | preschool_education_centers_raion | children_school | school_quota | school_education_centers_raion | school_education_centers_top_20_raion | hospital_beds_raion | healthcare_centers_raion | university_top_20_raion | sport_objects_raion | additional_education_raion | culture_objects_top_25_raion | shopping_centers_raion | office_raion | full_all | male_f | female_f | young_all | young_male | young_female | work_all | work_male | work_female | ekder_all | ekder_male | ekder_female | 0_6_all | 0_6_male | 0_6_female | 7_14_all | 7_14_male | 7_14_female | 0_17_all | 0_17_male | 0_17_female | 16_29_all | 16_29_male | 16_29_female | 0_13_all | 0_13_male | 0_13_female | raion_build_count_with_material_info | build_count_block | build_count_wood | build_count_frame | build_count_brick | build_count_monolith | build_count_panel | build_count_foam | build_count_slag | build_count_mix | raion_build_count_with_builddate_info | build_count_before_1920 | build_count_1921-1945 | build_count_1946-1970 | build_count_1971-1995 | build_count_after_1995 | ID_metro | metro_min_avto | metro_km_avto | metro_min_walk | metro_km_walk | kindergarten_km | school_km | park_km | green_zone_km | industrial_km | water_treatment_km | cemetery_km | incineration_km | railroad_station_walk_km | railroad_station_walk_min | ID_railroad_station_walk | railroad_station_avto_km | railroad_station_avto_min | ID_railroad_station_avto | public_transport_station_km | public_transport_station_min_walk | water_km | mkad_km | ttk_km | sadovoe_km | bulvar_ring_km | kremlin_km | big_road1_km | ID_big_road1 | big_road2_km | ID_big_road2 | railroad_km | zd_vokzaly_avto_km | ID_railroad_terminal | bus_terminal_avto_km | ID_bus_terminal | oil_chemistry_km | nuclear_reactor_km | radiation_km | power_transmission_line_km | thermal_power_plant_km | ts_km | big_market_km | market_shop_km | fitness_km | swim_pool_km | ice_rink_km | stadium_km | basketball_km | hospice_morgue_km | detention_facility_km | public_healthcare_km | university_km | workplaces_km | shopping_centers_km | office_km | additional_education_km | preschool_km | big_church_km | church_synagogue_km | mosque_km | theater_km | museum_km | exhibition_km | catering_km | green_part_500 | prom_part_500 | office_count_500 | office_sqm_500 | trc_count_500 | trc_sqm_500 | cafe_count_500 | cafe_sum_500_min_price_avg | cafe_sum_500_max_price_avg | cafe_avg_price_500 | cafe_count_500_na_price | cafe_count_500_price_500 | cafe_count_500_price_1000 | cafe_count_500_price_1500 | cafe_count_500_price_2500 | cafe_count_500_price_4000 | cafe_count_500_price_high | big_church_count_500 | church_count_500 | mosque_count_500 | leisure_count_500 | sport_count_500 | market_count_500 | green_part_1000 | prom_part_1000 | office_count_1000 | office_sqm_1000 | trc_count_1000 | trc_sqm_1000 | cafe_count_1000 | cafe_sum_1000_min_price_avg | cafe_sum_1000_max_price_avg | cafe_avg_price_1000 | cafe_count_1000_na_price | cafe_count_1000_price_500 | cafe_count_1000_price_1000 | cafe_count_1000_price_1500 | cafe_count_1000_price_2500 | cafe_count_1000_price_4000 | cafe_count_1000_price_high | big_church_count_1000 | church_count_1000 | mosque_count_1000 | leisure_count_1000 | sport_count_1000 | market_count_1000 | green_part_1500 | prom_part_1500 | office_count_1500 | office_sqm_1500 | trc_count_1500 | trc_sqm_1500 | cafe_count_1500 | cafe_sum_1500_min_price_avg | cafe_sum_1500_max_price_avg | cafe_avg_price_1500 | cafe_count_1500_na_price | cafe_count_1500_price_500 | cafe_count_1500_price_1000 | cafe_count_1500_price_1500 | cafe_count_1500_price_2500 | cafe_count_1500_price_4000 | cafe_count_1500_price_high | big_church_count_1500 | church_count_1500 | mosque_count_1500 | leisure_count_1500 | sport_count_1500 | market_count_1500 | green_part_2000 | prom_part_2000 | office_count_2000 | office_sqm_2000 | trc_count_2000 | trc_sqm_2000 | cafe_count_2000 | cafe_sum_2000_min_price_avg | cafe_sum_2000_max_price_avg | cafe_avg_price_2000 | cafe_count_2000_na_price | cafe_count_2000_price_500 | cafe_count_2000_price_1000 | cafe_count_2000_price_1500 | cafe_count_2000_price_2500 | cafe_count_2000_price_4000 | cafe_count_2000_price_high | big_church_count_2000 | church_count_2000 | mosque_count_2000 | leisure_count_2000 | sport_count_2000 | market_count_2000 | green_part_3000 | prom_part_3000 | office_count_3000 | office_sqm_3000 | trc_count_3000 | trc_sqm_3000 | cafe_count_3000 | cafe_sum_3000_min_price_avg | cafe_sum_3000_max_price_avg | cafe_avg_price_3000 | cafe_count_3000_na_price | cafe_count_3000_price_500 | cafe_count_3000_price_1000 | cafe_count_3000_price_1500 | cafe_count_3000_price_2500 | cafe_count_3000_price_4000 | cafe_count_3000_price_high | big_church_count_3000 | church_count_3000 | mosque_count_3000 | leisure_count_3000 | sport_count_3000 | market_count_3000 | green_part_5000 | prom_part_5000 | office_count_5000 | office_sqm_5000 | trc_count_5000 | trc_sqm_5000 | cafe_count_5000 | cafe_sum_5000_min_price_avg | cafe_sum_5000_max_price_avg | cafe_avg_price_5000 | cafe_count_5000_na_price | cafe_count_5000_price_500 | cafe_count_5000_price_1000 | cafe_count_5000_price_1500 | cafe_count_5000_price_2500 | cafe_count_5000_price_4000 | cafe_count_5000_price_high | big_church_count_5000 | church_count_5000 | mosque_count_5000 | leisure_count_5000 | sport_count_5000 | market_count_5000 | price_doc | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 43 | 27.0 | 4.0 | NaN | NaN | NaN | NaN | NaN | NaN | 6.407578e+06 | 155572 | 0.189727 | 0.000070 | 9576 | 5001.0 | 5 | 10309 | 11065.0 | 5 | 0 | 240.0 | 1 | 0 | 7 | 3 | 0 | 16 | 1 | 86206 | 40477 | 45729 | 21154 | 11007 | 10147 | 98207 | 52277 | 45930 | 36211 | 10580 | 25631 | 9576 | 4899 | 4677 | 10309 | 5463 | 4846 | 23603 | 12286 | 11317 | 17508 | 9425 | 8083 | 18654 | 9709 | 8945 | 211.0 | 25.0 | 0.0 | 0.0 | 0.0 | 2.0 | 184.0 | 0.0 | 0.0 | 0.0 | 211.0 | 0.0 | 0.0 | 0.0 | 206.0 | 5.0 | 1 | 2.590241 | 1.131260 | 13.575119 | 1.131260 | 0.145700 | 0.177975 | 2.158587 | 0.600973 | 1.080934 | 23.683460 | 1.804127 | 3.633334 | 5.419893 | 65.038716 | 1.0 | 5.419893 | 6.905893 | 1 | 0.274985 | 3.299822 | 0.992631 | 1.422391 | 10.918587 | 13.100618 | 13.675657 | 15.156211 | 1.422391 | 1 | 3.830951 | 5 | 1.305159 | 14.231961 | 101 | 24.292406 | 1 | 18.152338 | 5.718519 | 1.210027 | 1.062513 | 5.814135 | 4.308127 | 10.814172 | 1.676258 | 0.485841 | 3.065047 | 1.107594 | 8.148591 | 3.516513 | 2.392353 | 4.248036 | 0.974743 | 6.715026 | 0.884350 | 0.648488 | 0.637189 | 0.947962 | 0.177975 | 0.625783 | 0.628187 | 3.932040 | 14.053047 | 7.389498 | 7.023705 | 0.516838 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 7.36 | 0.00 | 1 | 30500 | 3 | 55600 | 19 | 527.78 | 888.89 | 708.33 | 1 | 10 | 4 | 3 | 1 | 0 | 0 | 1 | 2 | 0 | 0 | 6 | 1 | 14.27 | 6.92 | 3 | 39554 | 9 | 171420 | 34 | 566.67 | 969.70 | 768.18 | 1 | 14 | 11 | 6 | 2 | 0 | 0 | 1 | 2 | 0 | 0 | 7 | 1 | 11.77 | 15.97 | 9 | 188854 | 19 | 1244891 | 36 | 614.29 | 1042.86 | 828.57 | 1 | 15 | 11 | 6 | 2 | 1 | 0 | 1 | 2 | 0 | 0 | 10 | 1 | 11.98 | 13.55 | 12 | 251554 | 23 | 1419204 | 68 | 639.68 | 1079.37 | 859.52 | 5 | 21 | 22 | 16 | 3 | 1 | 0 | 2 | 4 | 0 | 0 | 21 | 1 | 13.09 | 13.31 | 29 | 807385 | 52 | 4036616 | 152 | 708.57 | 1185.71 | 947.14 | 12 | 39 | 48 | 40 | 9 | 4 | 0 | 13 | 22 | 1 | 0 | 52 | 4 | 5850000 |
| 1 | 2 | 34 | 19.0 | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | 9.589337e+06 | 115352 | 0.372602 | 0.049637 | 6880 | 3119.0 | 5 | 7759 | 6237.0 | 8 | 0 | 229.0 | 1 | 0 | 6 | 1 | 1 | 3 | 0 | 76284 | 34200 | 42084 | 15727 | 7925 | 7802 | 70194 | 35622 | 34572 | 29431 | 9266 | 20165 | 6880 | 3466 | 3414 | 7759 | 3909 | 3850 | 17700 | 8998 | 8702 | 15164 | 7571 | 7593 | 13729 | 6929 | 6800 | 245.0 | 83.0 | 1.0 | 0.0 | 67.0 | 4.0 | 90.0 | 0.0 | 0.0 | 0.0 | 244.0 | 1.0 | 1.0 | 143.0 | 84.0 | 15.0 | 2 | 0.936700 | 0.647337 | 7.620630 | 0.635053 | 0.147754 | 0.273345 | 0.550690 | 0.065321 | 0.966479 | 1.317476 | 4.655004 | 8.648587 | 3.411993 | 40.943917 | 2.0 | 3.641773 | 4.679745 | 2 | 0.065263 | 0.783160 | 0.698081 | 9.503405 | 3.103996 | 6.444333 | 8.132640 | 8.698054 | 2.887377 | 2 | 3.103996 | 4 | 0.694536 | 9.242586 | 32 | 5.706113 | 2 | 9.034642 | 3.489954 | 2.724295 | 1.246149 | 3.419574 | 0.725560 | 6.910568 | 3.424716 | 0.668364 | 2.000154 | 8.972823 | 6.127073 | 1.161579 | 2.543747 | 12.649879 | 1.477723 | 1.852560 | 0.686252 | 0.519311 | 0.688796 | 1.072315 | 0.273345 | 0.967821 | 0.471447 | 4.841544 | 6.829889 | 0.709260 | 2.358840 | 0.230287 | 25.14 | 0.00 | 0 | 0 | 0 | 0 | 5 | 860.00 | 1500.00 | 1180.00 | 0 | 1 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 26.66 | 0.07 | 2 | 86600 | 5 | 94065 | 13 | 615.38 | 1076.92 | 846.15 | 0 | 5 | 6 | 1 | 0 | 1 | 0 | 1 | 2 | 0 | 4 | 2 | 0 | 21.53 | 7.71 | 3 | 102910 | 7 | 127065 | 17 | 694.12 | 1205.88 | 950.00 | 0 | 6 | 7 | 1 | 2 | 1 | 0 | 1 | 5 | 0 | 4 | 9 | 0 | 22.37 | 19.25 | 4 | 165510 | 8 | 179065 | 21 | 695.24 | 1190.48 | 942.86 | 0 | 7 | 8 | 3 | 2 | 1 | 0 | 1 | 5 | 0 | 4 | 11 | 0 | 18.07 | 27.32 | 12 | 821986 | 14 | 491565 | 30 | 631.03 | 1086.21 | 858.62 | 1 | 11 | 11 | 4 | 2 | 1 | 0 | 1 | 7 | 0 | 6 | 19 | 1 | 10.26 | 27.47 | 66 | 2690465 | 40 | 2034942 | 177 | 673.81 | 1148.81 | 911.31 | 9 | 49 | 65 | 36 | 15 | 3 | 0 | 15 | 29 | 1 | 10 | 66 | 14 | 6000000 |
| 2 | 3 | 43 | 29.0 | 2.0 | NaN | NaN | NaN | NaN | NaN | NaN | 4.808270e+06 | 101708 | 0.112560 | 0.118537 | 5879 | 1463.0 | 4 | 6207 | 5580.0 | 7 | 0 | 1183.0 | 1 | 0 | 5 | 1 | 0 | 0 | 1 | 101982 | 46076 | 55906 | 13028 | 6835 | 6193 | 63388 | 31813 | 31575 | 25292 | 7609 | 17683 | 5879 | 3095 | 2784 | 6207 | 3269 | 2938 | 14884 | 7821 | 7063 | 19401 | 9045 | 10356 | 11252 | 5916 | 5336 | 330.0 | 59.0 | 0.0 | 0.0 | 206.0 | 4.0 | 60.0 | 0.0 | 1.0 | 0.0 | 330.0 | 1.0 | 0.0 | 246.0 | 63.0 | 20.0 | 3 | 2.120999 | 1.637996 | 17.351515 | 1.445960 | 0.049102 | 0.158072 | 0.374848 | 0.453172 | 0.939275 | 4.912660 | 3.381083 | 11.996480 | 1.277658 | 15.331896 | 3.0 | 1.277658 | 1.701420 | 3 | 0.328756 | 3.945073 | 0.468265 | 5.604800 | 2.927487 | 6.963403 | 8.054252 | 9.067885 | 0.647250 | 3 | 2.927487 | 4 | 0.700691 | 9.540544 | 5 | 6.710302 | 3 | 5.777394 | 7.506612 | 0.772216 | 1.602183 | 3.682455 | 3.562188 | 5.752368 | 1.375443 | 0.733101 | 1.239304 | 1.978517 | 0.767569 | 1.952771 | 0.621357 | 7.682303 | 0.097144 | 0.841254 | 1.510089 | 1.486533 | 1.543049 | 0.391957 | 0.158072 | 3.178751 | 0.755946 | 7.922152 | 4.273200 | 3.156423 | 4.958214 | 0.190462 | 1.67 | 0.00 | 0 | 0 | 0 | 0 | 3 | 666.67 | 1166.67 | 916.67 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4.99 | 0.29 | 0 | 0 | 0 | 0 | 9 | 642.86 | 1142.86 | 892.86 | 2 | 0 | 5 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 5 | 3 | 9.92 | 6.73 | 0 | 0 | 1 | 2600 | 14 | 516.67 | 916.67 | 716.67 | 2 | 4 | 6 | 2 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 6 | 5 | 12.99 | 12.75 | 4 | 100200 | 7 | 52550 | 24 | 563.64 | 977.27 | 770.45 | 2 | 8 | 9 | 4 | 1 | 0 | 0 | 0 | 4 | 0 | 0 | 8 | 5 | 12.14 | 26.46 | 8 | 110856 | 7 | 52550 | 41 | 697.44 | 1192.31 | 944.87 | 2 | 9 | 17 | 9 | 3 | 1 | 0 | 0 | 11 | 0 | 0 | 20 | 6 | 13.69 | 21.58 | 43 | 1478160 | 35 | 1572990 | 122 | 702.68 | 1196.43 | 949.55 | 10 | 29 | 45 | 25 | 10 | 3 | 0 | 11 | 27 | 0 | 4 | 67 | 10 | 5700000 |
| 3 | 4 | 89 | 50.0 | 9.0 | NaN | NaN | NaN | NaN | NaN | NaN | 1.258354e+07 | 178473 | 0.194703 | 0.069753 | 13087 | 6839.0 | 9 | 13670 | 17063.0 | 10 | 0 | NaN | 1 | 0 | 17 | 6 | 0 | 11 | 4 | 21155 | 9828 | 11327 | 28563 | 14680 | 13883 | 120381 | 60040 | 60341 | 29529 | 9083 | 20446 | 13087 | 6645 | 6442 | 13670 | 7126 | 6544 | 32063 | 16513 | 15550 | 3292 | 1450 | 1842 | 24934 | 12782 | 12152 | 458.0 | 9.0 | 51.0 | 12.0 | 124.0 | 50.0 | 201.0 | 0.0 | 9.0 | 2.0 | 459.0 | 13.0 | 24.0 | 40.0 | 130.0 | 252.0 | 4 | 1.489049 | 0.984537 | 11.565624 | 0.963802 | 0.179441 | 0.236455 | 0.078090 | 0.106125 | 0.451173 | 15.623710 | 2.017080 | 14.317640 | 4.291432 | 51.497190 | 4.0 | 3.816045 | 5.271136 | 4 | 0.131597 | 1.579164 | 1.200336 | 2.677824 | 14.606501 | 17.457198 | 18.309433 | 19.487005 | 2.677824 | 1 | 2.780449 | 17 | 1.999265 | 17.478380 | 83 | 6.734618 | 1 | 27.667863 | 9.522538 | 6.348716 | 1.767612 | 11.178333 | 0.583025 | 27.892717 | 0.811275 | 0.623484 | 1.950317 | 6.483172 | 7.385521 | 4.923843 | 3.549558 | 8.789894 | 2.163735 | 10.903161 | 0.622272 | 0.599914 | 0.934273 | 0.892674 | 0.236455 | 1.031777 | 1.561505 | 15.300449 | 16.990677 | 16.041521 | 5.029696 | 0.465820 | 17.36 | 0.57 | 0 | 0 | 0 | 0 | 2 | 1000.00 | 1500.00 | 1250.00 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 19.25 | 10.35 | 1 | 11000 | 6 | 80780 | 12 | 658.33 | 1083.33 | 870.83 | 0 | 3 | 4 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | 28.38 | 6.57 | 2 | 11000 | 7 | 89492 | 23 | 673.91 | 1130.43 | 902.17 | 0 | 5 | 9 | 8 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | 2 | 32.29 | 5.73 | 2 | 11000 | 7 | 89492 | 25 | 660.00 | 1120.00 | 890.00 | 0 | 5 | 11 | 8 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 13 | 2 | 20.79 | 3.57 | 4 | 167000 | 12 | 205756 | 32 | 718.75 | 1218.75 | 968.75 | 0 | 5 | 14 | 10 | 3 | 0 | 0 | 1 | 2 | 0 | 0 | 18 | 3 | 14.18 | 3.89 | 8 | 244166 | 22 | 942180 | 61 | 931.58 | 1552.63 | 1242.11 | 4 | 7 | 21 | 15 | 11 | 2 | 1 | 4 | 4 | 0 | 0 | 26 | 3 | 13100000 |
| 4 | 5 | 77 | 77.0 | 4.0 | NaN | NaN | NaN | NaN | NaN | NaN | 8.398461e+06 | 108171 | 0.015234 | 0.037316 | 5706 | 3240.0 | 7 | 6748 | 7770.0 | 9 | 0 | 562.0 | 4 | 2 | 25 | 2 | 0 | 10 | 93 | 28179 | 13522 | 14657 | 13368 | 7159 | 6209 | 68043 | 34236 | 33807 | 26760 | 8563 | 18197 | 5706 | 2982 | 2724 | 6748 | 3664 | 3084 | 15237 | 8113 | 7124 | 5164 | 2583 | 2581 | 11631 | 6223 | 5408 | 746.0 | 48.0 | 0.0 | 0.0 | 643.0 | 16.0 | 35.0 | 0.0 | 3.0 | 1.0 | 746.0 | 371.0 | 114.0 | 146.0 | 62.0 | 53.0 | 5 | 1.257186 | 0.876620 | 8.266305 | 0.688859 | 0.247901 | 0.376838 | 0.258289 | 0.236214 | 0.392871 | 10.683540 | 2.936581 | 11.903910 | 0.853960 | 10.247521 | 5.0 | 1.595898 | 2.156284 | 113 | 0.071480 | 0.857764 | 0.820294 | 11.616653 | 1.721834 | 0.046810 | 0.787593 | 2.578671 | 1.721834 | 4 | 3.133531 | 10 | 0.084113 | 1.595898 | 113 | 1.423428 | 4 | 6.515857 | 8.671016 | 1.638318 | 3.632640 | 4.587917 | 2.609420 | 9.155057 | 1.969738 | 0.220288 | 2.544696 | 3.975401 | 3.610754 | 0.307915 | 1.864637 | 3.779781 | 1.121703 | 0.991683 | 0.892668 | 0.429052 | 0.077901 | 0.810801 | 0.376838 | 0.378756 | 0.121681 | 2.584370 | 1.112486 | 1.800125 | 1.339652 | 0.026102 | 3.56 | 4.44 | 15 | 293699 | 1 | 45000 | 48 | 702.22 | 1166.67 | 934.44 | 3 | 17 | 10 | 11 | 7 | 0 | 0 | 1 | 4 | 0 | 2 | 3 | 0 | 3.34 | 8.29 | 46 | 420952 | 3 | 158200 | 153 | 763.45 | 1272.41 | 1017.93 | 8 | 39 | 45 | 39 | 19 | 2 | 1 | 7 | 12 | 0 | 6 | 7 | 0 | 4.12 | 4.83 | 93 | 1195735 | 9 | 445900 | 272 | 766.80 | 1272.73 | 1019.76 | 19 | 70 | 74 | 72 | 30 | 6 | 1 | 18 | 30 | 0 | 10 | 14 | 2 | 4.53 | 5.02 | 149 | 1625130 | 17 | 564843 | 483 | 765.93 | 1269.23 | 1017.58 | 28 | 130 | 129 | 131 | 50 | 14 | 1 | 35 | 61 | 0 | 17 | 21 | 3 | 5.06 | 8.62 | 305 | 3420907 | 60 | 2296870 | 1068 | 853.03 | 1410.45 | 1131.74 | 63 | 266 | 267 | 262 | 149 | 57 | 4 | 70 | 121 | 1 | 40 | 77 | 5 | 8.38 | 10.92 | 689 | 8404624 | 114 | 3503058 | 2283 | 853.88 | 1411.45 | 1132.66 | 143 | 566 | 578 | 552 | 319 | 108 | 17 | 135 | 236 | 2 | 91 | 195 | 14 | 16331452 |
year_feature = [feature for feature in numerical_features if 'Yr' in feature or 'Year' in feature or "date" in feature]
year_feature
['raion_build_count_with_builddate_info']
train_data["raion_build_count_with_builddate_info"]
0 211.0
1 244.0
2 330.0
3 459.0
4 746.0
...
30466 282.0
30467 650.0
30468 NaN
30469 186.0
30470 303.0
Name: raion_build_count_with_builddate_info, Length: 30471, dtype: float64
discrete_feature=[feature for feature in numerical_features if len(train_data[feature].unique())<25 and feature not in year_feature+['id']]
print("Discrete Variables Count: {}".format(len(discrete_feature)))
Discrete Variables Count: 46
for feature in discrete_feature:
data=train_data.copy()
data.groupby(feature)['price_doc'].median().plot.bar()
plt.xlabel(feature)
plt.ylabel('price_doc')
plt.title(feature)
plt.show()
continuous_feature=[feature for feature in numerical_features if feature not in discrete_feature+year_feature+['id']]
print("Continuous feature Count {}".format(len(continuous_feature)))
Continuous feature Count 228
for feature in continuous_feature:
data=train_data.copy()
data[feature].hist(bins=25)
plt.xlabel(feature)
plt.ylabel("Count")
plt.title(feature)
plt.show()
for feature in continuous_feature:
data=train_data.copy()
if 0 in data[feature].unique():
pass
else:
data[feature]=np.log(data[feature])
data['price_doc']=np.log(data['price_doc'])
plt.scatter(data[feature],data['price_doc'])
plt.xlabel(feature)
plt.ylabel('price_doc')
plt.title(feature)
plt.show()
for feature in continuous_feature:
data=train_data.copy()
if 0 in data[feature].unique():
pass
else:
data[feature]=np.log(data[feature])
data.boxplot(column=feature)
plt.ylabel(feature)
plt.title(feature)
plt.show()
categorical_features=[feature for feature in train_data.columns if train_data[feature].dtypes=='O']
len(categorical_features)
16
for feature in categorical_features:
print('The feature is {} and number of categories are {}'.format(feature,len(train_data[feature].unique())))
The feature is timestamp and number of categories are 1161 The feature is product_type and number of categories are 2 The feature is sub_area and number of categories are 146 The feature is culture_objects_top_25 and number of categories are 2 The feature is thermal_power_plant_raion and number of categories are 2 The feature is incineration_raion and number of categories are 2 The feature is oil_chemistry_raion and number of categories are 2 The feature is radiation_raion and number of categories are 2 The feature is railroad_terminal_raion and number of categories are 2 The feature is big_market_raion and number of categories are 2 The feature is nuclear_reactor_raion and number of categories are 2 The feature is detention_facility_raion and number of categories are 2 The feature is water_1line and number of categories are 2 The feature is big_road1_1line and number of categories are 2 The feature is railroad_1line and number of categories are 2 The feature is ecology and number of categories are 5
for feature in categorical_features:
data=train_data.copy()
data.groupby(feature)['price_doc'].median().plot.bar()
plt.xlabel(feature)
plt.ylabel('price_doc')
plt.title(feature)
plt.show()
train_data["timestamp"]
0 2011-08-20
1 2011-08-23
2 2011-08-27
3 2011-09-01
4 2011-09-05
...
30466 2015-06-30
30467 2015-06-30
30468 2015-06-30
30469 2015-06-30
30470 2015-06-30
Name: timestamp, Length: 30471, dtype: object
train_data['date']=pd.to_datetime(train_data.timestamp)
train_data.drop("timestamp",axis=1,inplace=True)
train_data['year']=train_data.date.dt.year
train_data['month']=train_data.date.dt.month
train_data['day']=train_data.date.dt.day
train_data.drop("date",axis=1,inplace=True)
train_data.dtypes
id int64 full_sq int64 life_sq float64 floor float64 max_floor float64 material float64 build_year float64 num_room float64 kitch_sq float64 state float64 product_type object sub_area object area_m float64 raion_popul int64 green_zone_part float64 indust_part float64 children_preschool int64 preschool_quota float64 preschool_education_centers_raion int64 children_school int64 school_quota float64 school_education_centers_raion int64 school_education_centers_top_20_raion int64 hospital_beds_raion float64 healthcare_centers_raion int64 university_top_20_raion int64 sport_objects_raion int64 additional_education_raion int64 culture_objects_top_25 object culture_objects_top_25_raion int64 shopping_centers_raion int64 office_raion int64 thermal_power_plant_raion object incineration_raion object oil_chemistry_raion object radiation_raion object railroad_terminal_raion object big_market_raion object nuclear_reactor_raion object detention_facility_raion object full_all int64 male_f int64 female_f int64 young_all int64 young_male int64 young_female int64 work_all int64 work_male int64 work_female int64 ekder_all int64 ekder_male int64 ekder_female int64 0_6_all int64 0_6_male int64 0_6_female int64 7_14_all int64 7_14_male int64 7_14_female int64 0_17_all int64 0_17_male int64 0_17_female int64 16_29_all int64 16_29_male int64 16_29_female int64 0_13_all int64 0_13_male int64 0_13_female int64 raion_build_count_with_material_info float64 build_count_block float64 build_count_wood float64 build_count_frame float64 build_count_brick float64 build_count_monolith float64 build_count_panel float64 build_count_foam float64 build_count_slag float64 build_count_mix float64 raion_build_count_with_builddate_info float64 build_count_before_1920 float64 build_count_1921-1945 float64 build_count_1946-1970 float64 build_count_1971-1995 float64 build_count_after_1995 float64 ID_metro int64 metro_min_avto float64 metro_km_avto float64 metro_min_walk float64 metro_km_walk float64 kindergarten_km float64 school_km float64 park_km float64 green_zone_km float64 industrial_km float64 water_treatment_km float64 cemetery_km float64 incineration_km float64 railroad_station_walk_km float64 railroad_station_walk_min float64 ID_railroad_station_walk float64 railroad_station_avto_km float64 railroad_station_avto_min float64 ID_railroad_station_avto int64 public_transport_station_km float64 public_transport_station_min_walk float64 water_km float64 water_1line object mkad_km float64 ttk_km float64 sadovoe_km float64 bulvar_ring_km float64 kremlin_km float64 big_road1_km float64 ID_big_road1 int64 big_road1_1line object big_road2_km float64 ID_big_road2 int64 railroad_km float64 railroad_1line object zd_vokzaly_avto_km float64 ID_railroad_terminal int64 bus_terminal_avto_km float64 ID_bus_terminal int64 oil_chemistry_km float64 nuclear_reactor_km float64 radiation_km float64 power_transmission_line_km float64 thermal_power_plant_km float64 ts_km float64 big_market_km float64 market_shop_km float64 fitness_km float64 swim_pool_km float64 ice_rink_km float64 stadium_km float64 basketball_km float64 hospice_morgue_km float64 detention_facility_km float64 public_healthcare_km float64 university_km float64 workplaces_km float64 shopping_centers_km float64 office_km float64 additional_education_km float64 preschool_km float64 big_church_km float64 church_synagogue_km float64 mosque_km float64 theater_km float64 museum_km float64 exhibition_km float64 catering_km float64 ecology object green_part_500 float64 prom_part_500 float64 office_count_500 int64 office_sqm_500 int64 trc_count_500 int64 trc_sqm_500 int64 cafe_count_500 int64 cafe_sum_500_min_price_avg float64 cafe_sum_500_max_price_avg float64 cafe_avg_price_500 float64 cafe_count_500_na_price int64 cafe_count_500_price_500 int64 cafe_count_500_price_1000 int64 cafe_count_500_price_1500 int64 cafe_count_500_price_2500 int64 cafe_count_500_price_4000 int64 cafe_count_500_price_high int64 big_church_count_500 int64 church_count_500 int64 mosque_count_500 int64 leisure_count_500 int64 sport_count_500 int64 market_count_500 int64 green_part_1000 float64 prom_part_1000 float64 office_count_1000 int64 office_sqm_1000 int64 trc_count_1000 int64 trc_sqm_1000 int64 cafe_count_1000 int64 cafe_sum_1000_min_price_avg float64 cafe_sum_1000_max_price_avg float64 cafe_avg_price_1000 float64 cafe_count_1000_na_price int64 cafe_count_1000_price_500 int64 cafe_count_1000_price_1000 int64 cafe_count_1000_price_1500 int64 cafe_count_1000_price_2500 int64 cafe_count_1000_price_4000 int64 cafe_count_1000_price_high int64 big_church_count_1000 int64 church_count_1000 int64 mosque_count_1000 int64 leisure_count_1000 int64 sport_count_1000 int64 market_count_1000 int64 green_part_1500 float64 prom_part_1500 float64 office_count_1500 int64 office_sqm_1500 int64 trc_count_1500 int64 trc_sqm_1500 int64 cafe_count_1500 int64 cafe_sum_1500_min_price_avg float64 cafe_sum_1500_max_price_avg float64 cafe_avg_price_1500 float64 cafe_count_1500_na_price int64 cafe_count_1500_price_500 int64 cafe_count_1500_price_1000 int64 cafe_count_1500_price_1500 int64 cafe_count_1500_price_2500 int64 cafe_count_1500_price_4000 int64 cafe_count_1500_price_high int64 big_church_count_1500 int64 church_count_1500 int64 mosque_count_1500 int64 leisure_count_1500 int64 sport_count_1500 int64 market_count_1500 int64 green_part_2000 float64 prom_part_2000 float64 office_count_2000 int64 office_sqm_2000 int64 trc_count_2000 int64 trc_sqm_2000 int64 cafe_count_2000 int64 cafe_sum_2000_min_price_avg float64 cafe_sum_2000_max_price_avg float64 cafe_avg_price_2000 float64 cafe_count_2000_na_price int64 cafe_count_2000_price_500 int64 cafe_count_2000_price_1000 int64 cafe_count_2000_price_1500 int64 cafe_count_2000_price_2500 int64 cafe_count_2000_price_4000 int64 cafe_count_2000_price_high int64 big_church_count_2000 int64 church_count_2000 int64 mosque_count_2000 int64 leisure_count_2000 int64 sport_count_2000 int64 market_count_2000 int64 green_part_3000 float64 prom_part_3000 float64 office_count_3000 int64 office_sqm_3000 int64 trc_count_3000 int64 trc_sqm_3000 int64 cafe_count_3000 int64 cafe_sum_3000_min_price_avg float64 cafe_sum_3000_max_price_avg float64 cafe_avg_price_3000 float64 cafe_count_3000_na_price int64 cafe_count_3000_price_500 int64 cafe_count_3000_price_1000 int64 cafe_count_3000_price_1500 int64 cafe_count_3000_price_2500 int64 cafe_count_3000_price_4000 int64 cafe_count_3000_price_high int64 big_church_count_3000 int64 church_count_3000 int64 mosque_count_3000 int64 leisure_count_3000 int64 sport_count_3000 int64 market_count_3000 int64 green_part_5000 float64 prom_part_5000 float64 office_count_5000 int64 office_sqm_5000 int64 trc_count_5000 int64 trc_sqm_5000 int64 cafe_count_5000 int64 cafe_sum_5000_min_price_avg float64 cafe_sum_5000_max_price_avg float64 cafe_avg_price_5000 float64 cafe_count_5000_na_price int64 cafe_count_5000_price_500 int64 cafe_count_5000_price_1000 int64 cafe_count_5000_price_1500 int64 cafe_count_5000_price_2500 int64 cafe_count_5000_price_4000 int64 cafe_count_5000_price_high int64 big_church_count_5000 int64 church_count_5000 int64 mosque_count_5000 int64 leisure_count_5000 int64 sport_count_5000 int64 market_count_5000 int64 price_doc int64 year int64 month int64 day int64 dtype: object
test_data=pd.read_csv("test_data.csv")
test_data["timestamp"][0]
'2015-07-01'
test_data['date']=pd.to_datetime(test_data.timestamp)
test_data.drop("timestamp",axis=1,inplace=True)
test_data['year']=test_data.date.dt.year
test_data['month']=test_data.date.dt.month
test_data['day']=test_data.date.dt.day
test_data.drop("date",axis=1,inplace=True)
test_data.dtypes
id int64 full_sq float64 life_sq float64 floor int64 max_floor int64 material int64 build_year float64 num_room int64 kitch_sq float64 state float64 product_type object sub_area object area_m float64 raion_popul int64 green_zone_part float64 indust_part float64 children_preschool int64 preschool_quota float64 preschool_education_centers_raion int64 children_school int64 school_quota float64 school_education_centers_raion int64 school_education_centers_top_20_raion int64 hospital_beds_raion float64 healthcare_centers_raion int64 university_top_20_raion int64 sport_objects_raion int64 additional_education_raion int64 culture_objects_top_25 object culture_objects_top_25_raion int64 shopping_centers_raion int64 office_raion int64 thermal_power_plant_raion object incineration_raion object oil_chemistry_raion object radiation_raion object railroad_terminal_raion object big_market_raion object nuclear_reactor_raion object detention_facility_raion object full_all int64 male_f int64 female_f int64 young_all int64 young_male int64 young_female int64 work_all int64 work_male int64 work_female int64 ekder_all int64 ekder_male int64 ekder_female int64 0_6_all int64 0_6_male int64 0_6_female int64 7_14_all int64 7_14_male int64 7_14_female int64 0_17_all int64 0_17_male int64 0_17_female int64 16_29_all int64 16_29_male int64 16_29_female int64 0_13_all int64 0_13_male int64 0_13_female int64 raion_build_count_with_material_info float64 build_count_block float64 build_count_wood float64 build_count_frame float64 build_count_brick float64 build_count_monolith float64 build_count_panel float64 build_count_foam float64 build_count_slag float64 build_count_mix float64 raion_build_count_with_builddate_info float64 build_count_before_1920 float64 build_count_1921-1945 float64 build_count_1946-1970 float64 build_count_1971-1995 float64 build_count_after_1995 float64 ID_metro int64 metro_min_avto float64 metro_km_avto float64 metro_min_walk float64 metro_km_walk float64 kindergarten_km float64 school_km float64 park_km float64 green_zone_km float64 industrial_km float64 water_treatment_km float64 cemetery_km float64 incineration_km float64 railroad_station_walk_km float64 railroad_station_walk_min float64 ID_railroad_station_walk float64 railroad_station_avto_km float64 railroad_station_avto_min float64 ID_railroad_station_avto int64 public_transport_station_km float64 public_transport_station_min_walk float64 water_km float64 water_1line object mkad_km float64 ttk_km float64 sadovoe_km float64 bulvar_ring_km float64 kremlin_km float64 big_road1_km float64 ID_big_road1 int64 big_road1_1line object big_road2_km float64 ID_big_road2 int64 railroad_km float64 railroad_1line object zd_vokzaly_avto_km float64 ID_railroad_terminal int64 bus_terminal_avto_km float64 ID_bus_terminal int64 oil_chemistry_km float64 nuclear_reactor_km float64 radiation_km float64 power_transmission_line_km float64 thermal_power_plant_km float64 ts_km float64 big_market_km float64 market_shop_km float64 fitness_km float64 swim_pool_km float64 ice_rink_km float64 stadium_km float64 basketball_km float64 hospice_morgue_km float64 detention_facility_km float64 public_healthcare_km float64 university_km float64 workplaces_km float64 shopping_centers_km float64 office_km float64 additional_education_km float64 preschool_km float64 big_church_km float64 church_synagogue_km float64 mosque_km float64 theater_km float64 museum_km float64 exhibition_km float64 catering_km float64 ecology object green_part_500 float64 prom_part_500 float64 office_count_500 int64 office_sqm_500 int64 trc_count_500 int64 trc_sqm_500 int64 cafe_count_500 int64 cafe_sum_500_min_price_avg float64 cafe_sum_500_max_price_avg float64 cafe_avg_price_500 float64 cafe_count_500_na_price int64 cafe_count_500_price_500 int64 cafe_count_500_price_1000 int64 cafe_count_500_price_1500 int64 cafe_count_500_price_2500 int64 cafe_count_500_price_4000 int64 cafe_count_500_price_high int64 big_church_count_500 int64 church_count_500 int64 mosque_count_500 int64 leisure_count_500 int64 sport_count_500 int64 market_count_500 int64 green_part_1000 float64 prom_part_1000 float64 office_count_1000 int64 office_sqm_1000 int64 trc_count_1000 int64 trc_sqm_1000 int64 cafe_count_1000 int64 cafe_sum_1000_min_price_avg float64 cafe_sum_1000_max_price_avg float64 cafe_avg_price_1000 float64 cafe_count_1000_na_price int64 cafe_count_1000_price_500 int64 cafe_count_1000_price_1000 int64 cafe_count_1000_price_1500 int64 cafe_count_1000_price_2500 int64 cafe_count_1000_price_4000 int64 cafe_count_1000_price_high int64 big_church_count_1000 int64 church_count_1000 int64 mosque_count_1000 int64 leisure_count_1000 int64 sport_count_1000 int64 market_count_1000 int64 green_part_1500 float64 prom_part_1500 float64 office_count_1500 int64 office_sqm_1500 int64 trc_count_1500 int64 trc_sqm_1500 int64 cafe_count_1500 int64 cafe_sum_1500_min_price_avg float64 cafe_sum_1500_max_price_avg float64 cafe_avg_price_1500 float64 cafe_count_1500_na_price int64 cafe_count_1500_price_500 int64 cafe_count_1500_price_1000 int64 cafe_count_1500_price_1500 int64 cafe_count_1500_price_2500 int64 cafe_count_1500_price_4000 int64 cafe_count_1500_price_high int64 big_church_count_1500 int64 church_count_1500 int64 mosque_count_1500 int64 leisure_count_1500 int64 sport_count_1500 int64 market_count_1500 int64 green_part_2000 float64 prom_part_2000 float64 office_count_2000 int64 office_sqm_2000 int64 trc_count_2000 int64 trc_sqm_2000 int64 cafe_count_2000 int64 cafe_sum_2000_min_price_avg float64 cafe_sum_2000_max_price_avg float64 cafe_avg_price_2000 float64 cafe_count_2000_na_price int64 cafe_count_2000_price_500 int64 cafe_count_2000_price_1000 int64 cafe_count_2000_price_1500 int64 cafe_count_2000_price_2500 int64 cafe_count_2000_price_4000 int64 cafe_count_2000_price_high int64 big_church_count_2000 int64 church_count_2000 int64 mosque_count_2000 int64 leisure_count_2000 int64 sport_count_2000 int64 market_count_2000 int64 green_part_3000 float64 prom_part_3000 float64 office_count_3000 int64 office_sqm_3000 int64 trc_count_3000 int64 trc_sqm_3000 int64 cafe_count_3000 int64 cafe_sum_3000_min_price_avg float64 cafe_sum_3000_max_price_avg float64 cafe_avg_price_3000 float64 cafe_count_3000_na_price int64 cafe_count_3000_price_500 int64 cafe_count_3000_price_1000 int64 cafe_count_3000_price_1500 int64 cafe_count_3000_price_2500 int64 cafe_count_3000_price_4000 int64 cafe_count_3000_price_high int64 big_church_count_3000 int64 church_count_3000 int64 mosque_count_3000 int64 leisure_count_3000 int64 sport_count_3000 int64 market_count_3000 int64 green_part_5000 float64 prom_part_5000 float64 office_count_5000 int64 office_sqm_5000 int64 trc_count_5000 int64 trc_sqm_5000 int64 cafe_count_5000 int64 cafe_sum_5000_min_price_avg float64 cafe_sum_5000_max_price_avg float64 cafe_avg_price_5000 float64 cafe_count_5000_na_price int64 cafe_count_5000_price_500 int64 cafe_count_5000_price_1000 int64 cafe_count_5000_price_1500 int64 cafe_count_5000_price_2500 int64 cafe_count_5000_price_4000 int64 cafe_count_5000_price_high int64 big_church_count_5000 int64 church_count_5000 int64 mosque_count_5000 int64 leisure_count_5000 int64 sport_count_5000 int64 market_count_5000 int64 year int64 month int64 day int64 dtype: object
train_data.shape
(30471, 294)
test_data.shape
(7662, 293)
features_nan=[feature for feature in train_data.columns if train_data[feature].isnull().sum()>1 and train_data[feature].dtypes=='O']
for feature in features_nan:
print("{}: {}% missing values".format(feature,np.round(train_data[feature].isnull().mean(),4)))
features_nan
[]
numerical_with_nan=[feature for feature in train_data.columns if train_data[feature].isnull().sum()>1 and train_data[feature].dtypes!='O']
for feature in numerical_with_nan:
print("{}: {}% missing value".format(feature,np.around(train_data[feature].isnull().mean(),4)))
life_sq: 0.2095% missing value floor: 0.0055% missing value max_floor: 0.3141% missing value material: 0.3141% missing value build_year: 0.4465% missing value num_room: 0.3141% missing value kitch_sq: 0.3141% missing value state: 0.445% missing value preschool_quota: 0.2195% missing value school_quota: 0.2194% missing value hospital_beds_raion: 0.4739% missing value raion_build_count_with_material_info: 0.1638% missing value build_count_block: 0.1638% missing value build_count_wood: 0.1638% missing value build_count_frame: 0.1638% missing value build_count_brick: 0.1638% missing value build_count_monolith: 0.1638% missing value build_count_panel: 0.1638% missing value build_count_foam: 0.1638% missing value build_count_slag: 0.1638% missing value build_count_mix: 0.1638% missing value raion_build_count_with_builddate_info: 0.1638% missing value build_count_before_1920: 0.1638% missing value build_count_1921-1945: 0.1638% missing value build_count_1946-1970: 0.1638% missing value build_count_1971-1995: 0.1638% missing value build_count_after_1995: 0.1638% missing value metro_min_walk: 0.0008% missing value metro_km_walk: 0.0008% missing value railroad_station_walk_km: 0.0008% missing value railroad_station_walk_min: 0.0008% missing value ID_railroad_station_walk: 0.0008% missing value cafe_sum_500_min_price_avg: 0.4359% missing value cafe_sum_500_max_price_avg: 0.4359% missing value cafe_avg_price_500: 0.4359% missing value cafe_sum_1000_min_price_avg: 0.2141% missing value cafe_sum_1000_max_price_avg: 0.2141% missing value cafe_avg_price_1000: 0.2141% missing value cafe_sum_1500_min_price_avg: 0.1378% missing value cafe_sum_1500_max_price_avg: 0.1378% missing value cafe_avg_price_1500: 0.1378% missing value cafe_sum_2000_min_price_avg: 0.0566% missing value cafe_sum_2000_max_price_avg: 0.0566% missing value cafe_avg_price_2000: 0.0566% missing value cafe_sum_3000_min_price_avg: 0.0325% missing value cafe_sum_3000_max_price_avg: 0.0325% missing value cafe_avg_price_3000: 0.0325% missing value prom_part_5000: 0.0058% missing value cafe_sum_5000_min_price_avg: 0.0097% missing value cafe_sum_5000_max_price_avg: 0.0097% missing value cafe_avg_price_5000: 0.0097% missing value
for feature in numerical_with_nan:
## We will replace by using median since there are outliers
median_value=train_data[feature].median()
## create a new feature to capture nan values
train_data[feature+'nan']=np.where(train_data[feature].isnull(),1,0)
train_data[feature].fillna(median_value,inplace=True)
train_data[numerical_with_nan].isnull().sum()
life_sq 0 floor 0 max_floor 0 material 0 build_year 0 num_room 0 kitch_sq 0 state 0 preschool_quota 0 school_quota 0 hospital_beds_raion 0 raion_build_count_with_material_info 0 build_count_block 0 build_count_wood 0 build_count_frame 0 build_count_brick 0 build_count_monolith 0 build_count_panel 0 build_count_foam 0 build_count_slag 0 build_count_mix 0 raion_build_count_with_builddate_info 0 build_count_before_1920 0 build_count_1921-1945 0 build_count_1946-1970 0 build_count_1971-1995 0 build_count_after_1995 0 metro_min_walk 0 metro_km_walk 0 railroad_station_walk_km 0 railroad_station_walk_min 0 ID_railroad_station_walk 0 cafe_sum_500_min_price_avg 0 cafe_sum_500_max_price_avg 0 cafe_avg_price_500 0 cafe_sum_1000_min_price_avg 0 cafe_sum_1000_max_price_avg 0 cafe_avg_price_1000 0 cafe_sum_1500_min_price_avg 0 cafe_sum_1500_max_price_avg 0 cafe_avg_price_1500 0 cafe_sum_2000_min_price_avg 0 cafe_sum_2000_max_price_avg 0 cafe_avg_price_2000 0 cafe_sum_3000_min_price_avg 0 cafe_sum_3000_max_price_avg 0 cafe_avg_price_3000 0 prom_part_5000 0 cafe_sum_5000_min_price_avg 0 cafe_sum_5000_max_price_avg 0 cafe_avg_price_5000 0 dtype: int64
train_data.dtypes
id int64
full_sq int64
life_sq float64
floor float64
max_floor float64
...
cafe_avg_price_3000nan int32
prom_part_5000nan int32
cafe_sum_5000_min_price_avgnan int32
cafe_sum_5000_max_price_avgnan int32
cafe_avg_price_5000nan int32
Length: 345, dtype: object
features_nan_test=[feature for feature in test_data.columns if test_data[feature].isnull().sum()>1 and test_data[feature].dtypes=='O']
for feature in features_nan_test:
print("{}: {}% missing values".format(feature,np.round(test_data[feature].isnull().mean(),4)))
product_type: 0.0043% missing values
def replace_cat_feature_test(test_data,features_nan_test):
data=test_data.copy()
data[features_nan_test]=data[features_nan_test].fillna('Missing')
return data
test_data=replace_cat_feature_test(test_data,features_nan_test)
test_data[features_nan_test].isnull().sum()
product_type 0 dtype: int64
numerical_with_nan_test=[feature for feature in test_data.columns if test_data[feature].isnull().sum()>1 and test_data[feature].dtypes!='O']
for feature in numerical_with_nan_test:
print("{}: {}% missing value".format(feature,np.around(test_data[feature].isnull().mean(),4)))
life_sq: 0.1535% missing value build_year: 0.1369% missing value state: 0.0906% missing value preschool_quota: 0.2083% missing value school_quota: 0.2082% missing value hospital_beds_raion: 0.4461% missing value raion_build_count_with_material_info: 0.159% missing value build_count_block: 0.159% missing value build_count_wood: 0.159% missing value build_count_frame: 0.159% missing value build_count_brick: 0.159% missing value build_count_monolith: 0.159% missing value build_count_panel: 0.159% missing value build_count_foam: 0.159% missing value build_count_slag: 0.159% missing value build_count_mix: 0.159% missing value raion_build_count_with_builddate_info: 0.159% missing value build_count_before_1920: 0.159% missing value build_count_1921-1945: 0.159% missing value build_count_1946-1970: 0.159% missing value build_count_1971-1995: 0.159% missing value build_count_after_1995: 0.159% missing value metro_min_walk: 0.0044% missing value metro_km_walk: 0.0044% missing value railroad_station_walk_km: 0.0044% missing value railroad_station_walk_min: 0.0044% missing value ID_railroad_station_walk: 0.0044% missing value cafe_sum_500_min_price_avg: 0.4123% missing value cafe_sum_500_max_price_avg: 0.4123% missing value cafe_avg_price_500: 0.4123% missing value cafe_sum_1000_min_price_avg: 0.1595% missing value cafe_sum_1000_max_price_avg: 0.1595% missing value cafe_avg_price_1000: 0.1595% missing value cafe_sum_1500_min_price_avg: 0.1072% missing value cafe_sum_1500_max_price_avg: 0.1072% missing value cafe_avg_price_1500: 0.1072% missing value green_part_2000: 0.0025% missing value cafe_sum_2000_min_price_avg: 0.0553% missing value cafe_sum_2000_max_price_avg: 0.0553% missing value cafe_avg_price_2000: 0.0553% missing value cafe_sum_3000_min_price_avg: 0.0238% missing value cafe_sum_3000_max_price_avg: 0.0238% missing value cafe_avg_price_3000: 0.0238% missing value prom_part_5000: 0.012% missing value cafe_sum_5000_min_price_avg: 0.0167% missing value cafe_sum_5000_max_price_avg: 0.0167% missing value cafe_avg_price_5000: 0.0167% missing value
for feature in numerical_with_nan_test:
## We will replace by using median since there are outliers
median_value=test_data[feature].median()
## create a new feature to capture nan values
test_data[feature+'nan']=np.where(test_data[feature].isnull(),1,0)
test_data[feature].fillna(median_value,inplace=True)
test_data[numerical_with_nan_test].isnull().sum()
life_sq 0 build_year 0 state 0 preschool_quota 0 school_quota 0 hospital_beds_raion 0 raion_build_count_with_material_info 0 build_count_block 0 build_count_wood 0 build_count_frame 0 build_count_brick 0 build_count_monolith 0 build_count_panel 0 build_count_foam 0 build_count_slag 0 build_count_mix 0 raion_build_count_with_builddate_info 0 build_count_before_1920 0 build_count_1921-1945 0 build_count_1946-1970 0 build_count_1971-1995 0 build_count_after_1995 0 metro_min_walk 0 metro_km_walk 0 railroad_station_walk_km 0 railroad_station_walk_min 0 ID_railroad_station_walk 0 cafe_sum_500_min_price_avg 0 cafe_sum_500_max_price_avg 0 cafe_avg_price_500 0 cafe_sum_1000_min_price_avg 0 cafe_sum_1000_max_price_avg 0 cafe_avg_price_1000 0 cafe_sum_1500_min_price_avg 0 cafe_sum_1500_max_price_avg 0 cafe_avg_price_1500 0 green_part_2000 0 cafe_sum_2000_min_price_avg 0 cafe_sum_2000_max_price_avg 0 cafe_avg_price_2000 0 cafe_sum_3000_min_price_avg 0 cafe_sum_3000_max_price_avg 0 cafe_avg_price_3000 0 prom_part_5000 0 cafe_sum_5000_min_price_avg 0 cafe_sum_5000_max_price_avg 0 cafe_avg_price_5000 0 dtype: int64
test_data.shape
(7662, 340)
train_data.shape
(30471, 345)
numerical_features_test= [feature for feature in test_data.columns if test_data[feature].dtypes != 'O']
print('Number of numerical variables: ', len(numerical_features_test))
test_data[numerical_features_test].head()
Number of numerical variables: 325
| id | full_sq | life_sq | floor | max_floor | material | build_year | num_room | kitch_sq | state | area_m | raion_popul | green_zone_part | indust_part | children_preschool | preschool_quota | preschool_education_centers_raion | children_school | school_quota | school_education_centers_raion | school_education_centers_top_20_raion | hospital_beds_raion | healthcare_centers_raion | university_top_20_raion | sport_objects_raion | additional_education_raion | culture_objects_top_25_raion | shopping_centers_raion | office_raion | full_all | male_f | female_f | young_all | young_male | young_female | work_all | work_male | work_female | ekder_all | ekder_male | ekder_female | 0_6_all | 0_6_male | 0_6_female | 7_14_all | 7_14_male | 7_14_female | 0_17_all | 0_17_male | 0_17_female | 16_29_all | 16_29_male | 16_29_female | 0_13_all | 0_13_male | 0_13_female | raion_build_count_with_material_info | build_count_block | build_count_wood | build_count_frame | build_count_brick | build_count_monolith | build_count_panel | build_count_foam | build_count_slag | build_count_mix | raion_build_count_with_builddate_info | build_count_before_1920 | build_count_1921-1945 | build_count_1946-1970 | build_count_1971-1995 | build_count_after_1995 | ID_metro | metro_min_avto | metro_km_avto | metro_min_walk | metro_km_walk | kindergarten_km | school_km | park_km | green_zone_km | industrial_km | water_treatment_km | cemetery_km | incineration_km | railroad_station_walk_km | railroad_station_walk_min | ID_railroad_station_walk | railroad_station_avto_km | railroad_station_avto_min | ID_railroad_station_avto | public_transport_station_km | public_transport_station_min_walk | water_km | mkad_km | ttk_km | sadovoe_km | bulvar_ring_km | kremlin_km | big_road1_km | ID_big_road1 | big_road2_km | ID_big_road2 | railroad_km | zd_vokzaly_avto_km | ID_railroad_terminal | bus_terminal_avto_km | ID_bus_terminal | oil_chemistry_km | nuclear_reactor_km | radiation_km | power_transmission_line_km | thermal_power_plant_km | ts_km | big_market_km | market_shop_km | fitness_km | swim_pool_km | ice_rink_km | stadium_km | basketball_km | hospice_morgue_km | detention_facility_km | public_healthcare_km | university_km | workplaces_km | shopping_centers_km | office_km | additional_education_km | preschool_km | big_church_km | church_synagogue_km | mosque_km | theater_km | museum_km | exhibition_km | catering_km | green_part_500 | prom_part_500 | office_count_500 | office_sqm_500 | trc_count_500 | trc_sqm_500 | cafe_count_500 | cafe_sum_500_min_price_avg | cafe_sum_500_max_price_avg | cafe_avg_price_500 | cafe_count_500_na_price | cafe_count_500_price_500 | cafe_count_500_price_1000 | ... | cafe_count_1000_price_4000 | cafe_count_1000_price_high | big_church_count_1000 | church_count_1000 | mosque_count_1000 | leisure_count_1000 | sport_count_1000 | market_count_1000 | green_part_1500 | prom_part_1500 | office_count_1500 | office_sqm_1500 | trc_count_1500 | trc_sqm_1500 | cafe_count_1500 | cafe_sum_1500_min_price_avg | cafe_sum_1500_max_price_avg | cafe_avg_price_1500 | cafe_count_1500_na_price | cafe_count_1500_price_500 | cafe_count_1500_price_1000 | cafe_count_1500_price_1500 | cafe_count_1500_price_2500 | cafe_count_1500_price_4000 | cafe_count_1500_price_high | big_church_count_1500 | church_count_1500 | mosque_count_1500 | leisure_count_1500 | sport_count_1500 | market_count_1500 | green_part_2000 | prom_part_2000 | office_count_2000 | office_sqm_2000 | trc_count_2000 | trc_sqm_2000 | cafe_count_2000 | cafe_sum_2000_min_price_avg | cafe_sum_2000_max_price_avg | cafe_avg_price_2000 | cafe_count_2000_na_price | cafe_count_2000_price_500 | cafe_count_2000_price_1000 | cafe_count_2000_price_1500 | cafe_count_2000_price_2500 | cafe_count_2000_price_4000 | cafe_count_2000_price_high | big_church_count_2000 | church_count_2000 | mosque_count_2000 | leisure_count_2000 | sport_count_2000 | market_count_2000 | green_part_3000 | prom_part_3000 | office_count_3000 | office_sqm_3000 | trc_count_3000 | trc_sqm_3000 | cafe_count_3000 | cafe_sum_3000_min_price_avg | cafe_sum_3000_max_price_avg | cafe_avg_price_3000 | cafe_count_3000_na_price | cafe_count_3000_price_500 | cafe_count_3000_price_1000 | cafe_count_3000_price_1500 | cafe_count_3000_price_2500 | cafe_count_3000_price_4000 | cafe_count_3000_price_high | big_church_count_3000 | church_count_3000 | mosque_count_3000 | leisure_count_3000 | sport_count_3000 | market_count_3000 | green_part_5000 | prom_part_5000 | office_count_5000 | office_sqm_5000 | trc_count_5000 | trc_sqm_5000 | cafe_count_5000 | cafe_sum_5000_min_price_avg | cafe_sum_5000_max_price_avg | cafe_avg_price_5000 | cafe_count_5000_na_price | cafe_count_5000_price_500 | cafe_count_5000_price_1000 | cafe_count_5000_price_1500 | cafe_count_5000_price_2500 | cafe_count_5000_price_4000 | cafe_count_5000_price_high | big_church_count_5000 | church_count_5000 | mosque_count_5000 | leisure_count_5000 | sport_count_5000 | market_count_5000 | year | month | day | life_sqnan | build_yearnan | statenan | preschool_quotanan | school_quotanan | hospital_beds_raionnan | raion_build_count_with_material_infonan | build_count_blocknan | build_count_woodnan | build_count_framenan | build_count_bricknan | build_count_monolithnan | build_count_panelnan | build_count_foamnan | build_count_slagnan | build_count_mixnan | raion_build_count_with_builddate_infonan | build_count_before_1920nan | build_count_1921-1945nan | build_count_1946-1970nan | build_count_1971-1995nan | build_count_after_1995nan | metro_min_walknan | metro_km_walknan | railroad_station_walk_kmnan | railroad_station_walk_minnan | ID_railroad_station_walknan | cafe_sum_500_min_price_avgnan | cafe_sum_500_max_price_avgnan | cafe_avg_price_500nan | cafe_sum_1000_min_price_avgnan | cafe_sum_1000_max_price_avgnan | cafe_avg_price_1000nan | cafe_sum_1500_min_price_avgnan | cafe_sum_1500_max_price_avgnan | cafe_avg_price_1500nan | green_part_2000nan | cafe_sum_2000_min_price_avgnan | cafe_sum_2000_max_price_avgnan | cafe_avg_price_2000nan | cafe_sum_3000_min_price_avgnan | cafe_sum_3000_max_price_avgnan | cafe_avg_price_3000nan | prom_part_5000nan | cafe_sum_5000_min_price_avgnan | cafe_sum_5000_max_price_avgnan | cafe_avg_price_5000nan | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 30474 | 39.0 | 20.7 | 2 | 9 | 1 | 1998.0 | 1 | 8.9 | 3.0 | 2.615514e+07 | 178264 | 0.137846 | 0.041116 | 14080 | 11926.0 | 11 | 14892 | 24750.0 | 13 | 1 | 990.0 | 1 | 0 | 13 | 4 | 0 | 4 | 4 | 102618 | 47681 | 54937 | 30808 | 16251 | 14557 | 121369 | 59138 | 62231 | 26087 | 7410 | 18677 | 14080 | 7457 | 6623 | 14892 | 7839 | 7053 | 34341 | 18094 | 16247 | 19906 | 9676 | 10230 | 27123 | 14340 | 12783 | 1681.0 | 173.0 | 607.0 | 19.0 | 245.0 | 116.0 | 431.0 | 1.0 | 84.0 | 5.0 | 1680.0 | 34.0 | 299.0 | 439.0 | 109.0 | 799.0 | 45 | 1.258957 | 0.735908 | 8.830901 | 0.735908 | 0.078502 | 0.746962 | 2.048487 | 0.061485 | 1.205404 | 0.967696 | 0.781053 | 10.56540 | 4.812102 | 57.745220 | 39.0 | 4.850748 | 6.274963 | 39 | 0.114134 | 1.369603 | 0.248151 | 6.374826 | 19.651101 | 22.790985 | 24.079707 | 24.779082 | 4.152246 | 2 | 5.706484 | 38 | 0.490549 | 27.553486 | 32 | 8.424959 | 9 | 22.624362 | 16.224083 | 6.620081 | 4.121874 | 8.957780 | 8.824060 | 15.483912 | 5.353674 | 0.225788 | 3.673942 | 11.810839 | 20.392427 | 9.131977 | 3.300120 | 25.462741 | 1.613152 | 17.214870 | 7.922610 | 2.414138 | 4.923614 | 0.514211 | 0.746962 | 0.749142 | 0.848297 | 1.917736 | 19.953413 | 14.052207 | 12.228576 | 0.446324 | 42.22 | 0.00 | 0 | 0 | 0 | 0 | 1 | 1000.0 | 1500.00 | 1250.00 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 1 | 0 | 0 | 3 | 0 | 20.14 | 0.70 | 0 | 0 | 0 | 0 | 2 | 1000.00 | 1500.00 | 1250.00 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 4 | 0 | 15.17 | 1.18 | 0 | 0 | 0 | 0 | 3 | 1000.00 | 1500.00 | 1250.00 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 1 | 2 | 1 | 0 | 5 | 0 | 14.69 | 2.87 | 0 | 0 | 3 | 73000 | 12 | 781.82 | 1227.27 | 1004.55 | 1 | 2 | 2 | 7 | 0 | 0 | 0 | 1 | 3 | 1 | 0 | 7 | 0 | 21.58 | 4.69 | 1 | 37550 | 8 | 299166 | 19 | 676.47 | 1088.24 | 882.35 | 2 | 5 | 4 | 8 | 0 | 0 | 0 | 1 | 10 | 1 | 0 | 14 | 1 | 2015 | 7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 30475 | 79.2 | 30.4 | 8 | 17 | 1 | 0.0 | 3 | 1.0 | 1.0 | 2.553630e+07 | 4001 | 0.496315 | 0.007122 | 275 | 3062.0 | 0 | 264 | 6974.0 | 0 | 0 | 990.0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 17790 | 8350 | 9443 | 574 | 297 | 277 | 2566 | 1356 | 1211 | 861 | 244 | 617 | 275 | 143 | 133 | 264 | 136 | 128 | 646 | 336 | 311 | 3796 | 2035 | 1762 | 506 | 261 | 245 | 295.0 | 44.0 | 1.0 | 0.0 | 84.0 | 7.0 | 92.0 | 0.0 | 0.0 | 0.0 | 295.0 | 0.0 | 2.0 | 144.0 | 73.0 | 31.0 | 21 | 4.230425 | 3.444625 | 41.335498 | 3.444625 | 1.192193 | 1.332570 | 4.400427 | 0.000000 | 0.742377 | 16.049420 | 2.244906 | 18.50054 | 5.458057 | 65.496687 | 24.0 | 5.458057 | 6.859956 | 24 | 0.826083 | 9.912993 | 0.799853 | 6.847813 | 16.975793 | 19.692960 | 20.864427 | 21.722620 | 2.148398 | 13 | 4.410488 | 27 | 2.342346 | 27.421853 | 50 | 16.913175 | 8 | 29.425443 | 17.080113 | 8.545593 | 4.932827 | 10.039833 | 3.654955 | 15.092542 | 8.156185 | 1.313180 | 4.244082 | 4.438242 | 13.445121 | 8.332180 | 7.095895 | 26.807204 | 3.775300 | 12.440198 | 9.672779 | 1.764298 | 3.764819 | 1.694967 | 1.332570 | 1.672126 | 1.162371 | 12.239901 | 13.006107 | 9.661063 | 4.323941 | 0.705873 | 86.33 | 0.00 | 0 | 0 | 0 | 0 | 0 | 700.0 | 1166.67 | 927.78 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 59.28 | 5.31 | 0 | 0 | 0 | 0 | 3 | 833.33 | 1500.00 | 1166.67 | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 49.26 | 4.06 | 0 | 0 | 1 | 5000 | 7 | 757.14 | 1285.71 | 1021.43 | 0 | 1 | 3 | 2 | 1 | 0 | 0 | 1 | 2 | 0 | 0 | 1 | 0 | 39.50 | 3.32 | 0 | 0 | 2 | 22000 | 10 | 680.00 | 1200.00 | 940.00 | 0 | 1 | 6 | 2 | 1 | 0 | 0 | 1 | 5 | 0 | 0 | 7 | 0 | 39.10 | 7.70 | 2 | 177300 | 6 | 231300 | 20 | 733.33 | 1250.00 | 991.67 | 2 | 4 | 8 | 4 | 1 | 1 | 0 | 2 | 11 | 0 | 1 | 12 | 1 | 2015 | 7 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 30476 | 40.5 | 25.1 | 3 | 5 | 2 | 1960.0 | 2 | 4.8 | 2.0 | 9.946335e+06 | 139322 | 0.065409 | 0.225825 | 6400 | 2232.0 | 7 | 6558 | 7966.0 | 7 | 0 | 1548.0 | 3 | 0 | 13 | 0 | 0 | 2 | 7 | 36154 | 16222 | 19932 | 13799 | 6937 | 6862 | 91795 | 44734 | 47061 | 33728 | 9653 | 24075 | 6400 | 3209 | 3191 | 6558 | 3317 | 3241 | 15514 | 7813 | 7701 | 8137 | 3787 | 4350 | 12162 | 6117 | 6045 | 561.0 | 111.0 | 0.0 | 0.0 | 254.0 | 3.0 | 189.0 | 0.0 | 4.0 | 0.0 | 561.0 | 0.0 | 5.0 | 437.0 | 79.0 | 40.0 | 44 | 1.585306 | 1.122214 | 13.466563 | 1.122214 | 0.065324 | 0.194608 | 2.513006 | 0.580638 | 0.900408 | 11.749900 | 3.389848 | 10.19563 | 3.628293 | 43.539514 | 68.0 | 3.977659 | 5.375048 | 59 | 0.116686 | 1.400229 | 1.384824 | 3.499380 | 5.627481 | 8.090528 | 8.671086 | 10.320728 | 0.580638 | 10 | 3.499380 | 1 | 2.220941 | 10.093318 | 5 | 7.921607 | 3 | 1.823381 | 14.431252 | 0.826743 | 2.388288 | 3.760642 | 3.290966 | 16.304596 | 0.644830 | 0.966254 | 1.332737 | 3.131143 | 1.464174 | 1.499581 | 0.487817 | 6.718082 | 0.711768 | 4.862872 | 3.506298 | 1.456661 | 1.223804 | 2.330995 | 0.194608 | 1.400094 | 1.177527 | 9.938735 | 2.983875 | 1.988346 | 0.794245 | 0.320864 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 3 | 400.0 | 750.00 | 575.00 | 1 | 1 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 6 | 2 | 30.97 | 8.75 | 2 | 34100 | 1 | 0 | 19 | 655.56 | 1111.11 | 883.33 | 1 | 6 | 6 | 4 | 2 | 0 | 0 | 1 | 1 | 0 | 1 | 12 | 3 | 40.90 | 10.51 | 6 | 80237 | 3 | 14090 | 28 | 633.33 | 1092.59 | 862.96 | 1 | 7 | 12 | 6 | 2 | 0 | 0 | 2 | 2 | 0 | 4 | 14 | 4 | 45.86 | 9.08 | 8 | 215237 | 6 | 39106 | 37 | 608.33 | 1069.44 | 838.89 | 1 | 8 | 19 | 7 | 2 | 0 | 0 | 2 | 3 | 0 | 5 | 22 | 4 | 25.62 | 13.59 | 27 | 427889 | 26 | 1024431 | 179 | 668.97 | 1132.18 | 900.57 | 5 | 53 | 64 | 42 | 11 | 4 | 0 | 10 | 21 | 0 | 10 | 71 | 11 | 2015 | 7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 30477 | 62.8 | 36.0 | 17 | 17 | 1 | 2016.0 | 2 | 62.8 | 3.0 | 2.149409e+07 | 7122 | 0.262459 | 0.017647 | 489 | 3062.0 | 0 | 469 | 6974.0 | 0 | 0 | 990.0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 9553 | 4529 | 5024 | 1021 | 529 | 493 | 4568 | 2414 | 2155 | 1533 | 435 | 1099 | 489 | 254 | 236 | 469 | 242 | 228 | 1150 | 597 | 553 | 2155 | 1206 | 950 | 900 | 465 | 435 | 295.0 | 44.0 | 1.0 | 0.0 | 84.0 | 7.0 | 92.0 | 0.0 | 0.0 | 0.0 | 295.0 | 0.0 | 2.0 | 144.0 | 73.0 | 31.0 | 45 | 7.931398 | 6.038848 | 68.559794 | 5.713316 | 3.189083 | 3.540105 | 5.612835 | 0.025446 | 0.466738 | 5.061917 | 2.701804 | 14.62944 | 10.284167 | 123.410001 | 39.0 | 10.609698 | 13.517419 | 39 | 3.093209 | 37.118504 | 0.233017 | 8.928836 | 22.094252 | 25.062928 | 26.226045 | 26.960463 | 2.722667 | 38 | 8.601110 | 2 | 4.476081 | 37.436772 | 50 | 13.979650 | 8 | 26.895118 | 19.942295 | 9.434351 | 6.218331 | 13.345715 | 10.480798 | 10.723870 | 11.112624 | 4.480234 | 8.577223 | 15.200509 | 18.560234 | 12.253021 | 6.831966 | 30.366022 | 5.731266 | 15.382678 | 11.306566 | 6.589381 | 8.102094 | 0.403429 | 3.540105 | 5.411312 | 0.213853 | 6.153091 | 18.121220 | 16.938290 | 14.171229 | 0.454087 | 22.01 | 0.15 | 0 | 0 | 0 | 0 | 1 | 300.0 | 500.00 | 400.00 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 28.82 | 3.59 | 0 | 0 | 0 | 0 | 1 | 300.00 | 500.00 | 400.00 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 31.35 | 2.99 | 0 | 0 | 0 | 0 | 1 | 300.00 | 500.00 | 400.00 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 34.87 | 1.34 | 0 | 0 | 0 | 0 | 1 | 300.00 | 500.00 | 400.00 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 24.25 | 1.66 | 0 | 0 | 0 | 0 | 5 | 1560.00 | 2500.00 | 2030.00 | 0 | 1 | 0 | 1 | 1 | 2 | 0 | 0 | 10 | 0 | 0 | 2 | 0 | 2015 | 7 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 30478 | 40.0 | 40.0 | 17 | 17 | 1 | 0.0 | 1 | 1.0 | 1.0 | 2.553630e+07 | 4001 | 0.496315 | 0.007122 | 275 | 3062.0 | 0 | 264 | 6974.0 | 0 | 0 | 990.0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 17790 | 8350 | 9443 | 574 | 297 | 277 | 2566 | 1356 | 1211 | 861 | 244 | 617 | 275 | 143 | 133 | 264 | 136 | 128 | 646 | 336 | 311 | 3796 | 2035 | 1762 | 506 | 261 | 245 | 295.0 | 44.0 | 1.0 | 0.0 | 84.0 | 7.0 | 92.0 | 0.0 | 0.0 | 0.0 | 295.0 | 0.0 | 2.0 | 144.0 | 73.0 | 31.0 | 21 | 2.152792 | 1.722233 | 20.666800 | 1.722233 | 0.897889 | 1.234235 | 4.566595 | 0.427248 | 0.353642 | 16.784630 | 2.250137 | 19.14919 | 3.735666 | 44.827989 | 24.0 | 3.735666 | 4.782323 | 24 | 0.630014 | 7.560163 | 0.394422 | 7.123215 | 17.148737 | 19.868997 | 21.038561 | 21.905792 | 2.808077 | 13 | 3.688405 | 27 | 1.727223 | 25.699461 | 50 | 17.366661 | 8 | 29.968660 | 17.397666 | 9.036942 | 5.506770 | 10.102328 | 3.729416 | 15.546028 | 6.433794 | 1.519553 | 2.521691 | 2.715850 | 13.898607 | 8.355285 | 7.401423 | 25.084813 | 2.052908 | 12.893684 | 9.479093 | 1.806570 | 4.338453 | 1.339078 | 1.234235 | 1.192543 | 1.186621 | 12.652956 | 13.459593 | 9.890758 | 4.555385 | 0.066503 | 3.33 | 3.70 | 0 | 0 | 0 | 0 | 2 | 1000.0 | 1750.00 | 1375.00 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 43.85 | 1.55 | 0 | 0 | 0 | 0 | 3 | 833.33 | 1500.00 | 1166.67 | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 38.61 | 3.12 | 0 | 0 | 2 | 22000 | 7 | 757.14 | 1285.71 | 1021.43 | 0 | 1 | 3 | 2 | 1 | 0 | 0 | 1 | 2 | 0 | 0 | 3 | 0 | 41.64 | 2.11 | 0 | 0 | 2 | 22000 | 9 | 700.00 | 1222.22 | 961.11 | 0 | 1 | 5 | 2 | 1 | 0 | 0 | 1 | 4 | 0 | 0 | 6 | 0 | 35.62 | 6.96 | 1 | 117300 | 4 | 201300 | 20 | 747.37 | 1263.16 | 1005.26 | 1 | 4 | 8 | 5 | 1 | 1 | 0 | 2 | 12 | 0 | 1 | 11 | 1 | 2015 | 7 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 325 columns
year_feature_test= [feature for feature in numerical_features if 'Yr' in feature or 'Year' in feature]
year_feature_test
[]
discrete_feature_test=[feature for feature in numerical_features_test if len(test_data[feature].unique())<25 and feature not in year_feature_test+['id']]
print("Discrete Variables Count: {}".format(len(discrete_feature_test)))
Discrete Variables Count: 101
continuous_feature_test=[feature for feature in numerical_features_test if feature not in discrete_feature_test+year_feature_test+['Id']]
print("Continuous feature Count {}".format(len(continuous_feature_test)))
Continuous feature Count 224
categorical_features_test=[feature for feature in test_data.columns if test_data[feature].dtypes=='O']
len(categorical_features_test)
15
for feature in categorical_features_test:
print('The feature is {} and number of categories are {}'.format(feature,len(test_data[feature].unique())))
The feature is product_type and number of categories are 3 The feature is sub_area and number of categories are 145 The feature is culture_objects_top_25 and number of categories are 2 The feature is thermal_power_plant_raion and number of categories are 2 The feature is incineration_raion and number of categories are 2 The feature is oil_chemistry_raion and number of categories are 2 The feature is radiation_raion and number of categories are 2 The feature is railroad_terminal_raion and number of categories are 2 The feature is big_market_raion and number of categories are 2 The feature is nuclear_reactor_raion and number of categories are 2 The feature is detention_facility_raion and number of categories are 2 The feature is water_1line and number of categories are 2 The feature is big_road1_1line and number of categories are 2 The feature is railroad_1line and number of categories are 2 The feature is ecology and number of categories are 5
for feature in categorical_features_test:
test_data[feature] = test_data[feature].map((train_data.groupby(feature)['price_doc'].count()/len(train_data)).to_dict())
#train_data[feature] = train_data[feature].map((train_data.groupby(feature)['SalePrice'].count()/len(train_data)).to_dict())
categorical_features=[feature for feature in train_data.columns if train_data[feature].dtypes=='O' if feature not in ["timestamp"]]
len(categorical_features)
15
for feature in categorical_features:
temp=train_data.groupby(feature)['price_doc'].count()/len(train_data)
temp_df=temp[temp>0.01].index
train_data[feature]=np.where(train_data[feature].isin(temp_df),train_data[feature],'Rare_var')
for feature in categorical_features_test:
temp=test_data[feature]
temp_df=temp[temp>0.01].index
test_data[feature]=np.where(test_data[feature].isin(temp_df),test_data[feature],'Rare_var')
for feature in categorical_features:
labels_ordered=train_data.groupby([feature])['price_doc'].mean().sort_values().index
labels_ordered={k:i for i,k in enumerate(labels_ordered,0)}
train_data[feature]=train_data[feature].map(labels_ordered)
for feature in categorical_features_test:
labels_ordered=test_data[feature]
labels_ordered={k:i for i,k in enumerate(labels_ordered,0)}
test_data[feature]=test_data[feature].map(labels_ordered)
train_data.head()
| id | full_sq | life_sq | floor | max_floor | material | build_year | num_room | kitch_sq | state | product_type | sub_area | area_m | raion_popul | green_zone_part | indust_part | children_preschool | preschool_quota | preschool_education_centers_raion | children_school | school_quota | school_education_centers_raion | school_education_centers_top_20_raion | hospital_beds_raion | healthcare_centers_raion | university_top_20_raion | sport_objects_raion | additional_education_raion | culture_objects_top_25 | culture_objects_top_25_raion | shopping_centers_raion | office_raion | thermal_power_plant_raion | incineration_raion | oil_chemistry_raion | radiation_raion | railroad_terminal_raion | big_market_raion | nuclear_reactor_raion | detention_facility_raion | full_all | male_f | female_f | young_all | young_male | young_female | work_all | work_male | work_female | ekder_all | ekder_male | ekder_female | 0_6_all | 0_6_male | 0_6_female | 7_14_all | 7_14_male | 7_14_female | 0_17_all | 0_17_male | 0_17_female | 16_29_all | 16_29_male | 16_29_female | 0_13_all | 0_13_male | 0_13_female | raion_build_count_with_material_info | build_count_block | build_count_wood | build_count_frame | build_count_brick | build_count_monolith | build_count_panel | build_count_foam | build_count_slag | build_count_mix | raion_build_count_with_builddate_info | build_count_before_1920 | build_count_1921-1945 | build_count_1946-1970 | build_count_1971-1995 | build_count_after_1995 | ID_metro | metro_min_avto | metro_km_avto | metro_min_walk | metro_km_walk | kindergarten_km | school_km | park_km | green_zone_km | industrial_km | water_treatment_km | cemetery_km | incineration_km | railroad_station_walk_km | railroad_station_walk_min | ID_railroad_station_walk | railroad_station_avto_km | railroad_station_avto_min | ID_railroad_station_avto | public_transport_station_km | public_transport_station_min_walk | water_km | water_1line | mkad_km | ttk_km | sadovoe_km | bulvar_ring_km | kremlin_km | big_road1_km | ID_big_road1 | big_road1_1line | big_road2_km | ID_big_road2 | railroad_km | railroad_1line | zd_vokzaly_avto_km | ID_railroad_terminal | bus_terminal_avto_km | ID_bus_terminal | oil_chemistry_km | nuclear_reactor_km | radiation_km | power_transmission_line_km | thermal_power_plant_km | ts_km | big_market_km | market_shop_km | fitness_km | swim_pool_km | ice_rink_km | stadium_km | basketball_km | hospice_morgue_km | detention_facility_km | public_healthcare_km | university_km | workplaces_km | shopping_centers_km | office_km | additional_education_km | preschool_km | big_church_km | church_synagogue_km | mosque_km | theater_km | museum_km | exhibition_km | ... | leisure_count_1000 | sport_count_1000 | market_count_1000 | green_part_1500 | prom_part_1500 | office_count_1500 | office_sqm_1500 | trc_count_1500 | trc_sqm_1500 | cafe_count_1500 | cafe_sum_1500_min_price_avg | cafe_sum_1500_max_price_avg | cafe_avg_price_1500 | cafe_count_1500_na_price | cafe_count_1500_price_500 | cafe_count_1500_price_1000 | cafe_count_1500_price_1500 | cafe_count_1500_price_2500 | cafe_count_1500_price_4000 | cafe_count_1500_price_high | big_church_count_1500 | church_count_1500 | mosque_count_1500 | leisure_count_1500 | sport_count_1500 | market_count_1500 | green_part_2000 | prom_part_2000 | office_count_2000 | office_sqm_2000 | trc_count_2000 | trc_sqm_2000 | cafe_count_2000 | cafe_sum_2000_min_price_avg | cafe_sum_2000_max_price_avg | cafe_avg_price_2000 | cafe_count_2000_na_price | cafe_count_2000_price_500 | cafe_count_2000_price_1000 | cafe_count_2000_price_1500 | cafe_count_2000_price_2500 | cafe_count_2000_price_4000 | cafe_count_2000_price_high | big_church_count_2000 | church_count_2000 | mosque_count_2000 | leisure_count_2000 | sport_count_2000 | market_count_2000 | green_part_3000 | prom_part_3000 | office_count_3000 | office_sqm_3000 | trc_count_3000 | trc_sqm_3000 | cafe_count_3000 | cafe_sum_3000_min_price_avg | cafe_sum_3000_max_price_avg | cafe_avg_price_3000 | cafe_count_3000_na_price | cafe_count_3000_price_500 | cafe_count_3000_price_1000 | cafe_count_3000_price_1500 | cafe_count_3000_price_2500 | cafe_count_3000_price_4000 | cafe_count_3000_price_high | big_church_count_3000 | church_count_3000 | mosque_count_3000 | leisure_count_3000 | sport_count_3000 | market_count_3000 | green_part_5000 | prom_part_5000 | office_count_5000 | office_sqm_5000 | trc_count_5000 | trc_sqm_5000 | cafe_count_5000 | cafe_sum_5000_min_price_avg | cafe_sum_5000_max_price_avg | cafe_avg_price_5000 | cafe_count_5000_na_price | cafe_count_5000_price_500 | cafe_count_5000_price_1000 | cafe_count_5000_price_1500 | cafe_count_5000_price_2500 | cafe_count_5000_price_4000 | cafe_count_5000_price_high | big_church_count_5000 | church_count_5000 | mosque_count_5000 | leisure_count_5000 | sport_count_5000 | market_count_5000 | price_doc | year | month | day | life_sqnan | floornan | max_floornan | materialnan | build_yearnan | num_roomnan | kitch_sqnan | statenan | preschool_quotanan | school_quotanan | hospital_beds_raionnan | raion_build_count_with_material_infonan | build_count_blocknan | build_count_woodnan | build_count_framenan | build_count_bricknan | build_count_monolithnan | build_count_panelnan | build_count_foamnan | build_count_slagnan | build_count_mixnan | raion_build_count_with_builddate_infonan | build_count_before_1920nan | build_count_1921-1945nan | build_count_1946-1970nan | build_count_1971-1995nan | build_count_after_1995nan | metro_min_walknan | metro_km_walknan | railroad_station_walk_kmnan | railroad_station_walk_minnan | ID_railroad_station_walknan | cafe_sum_500_min_price_avgnan | cafe_sum_500_max_price_avgnan | cafe_avg_price_500nan | cafe_sum_1000_min_price_avgnan | cafe_sum_1000_max_price_avgnan | cafe_avg_price_1000nan | cafe_sum_1500_min_price_avgnan | cafe_sum_1500_max_price_avgnan | cafe_avg_price_1500nan | cafe_sum_2000_min_price_avgnan | cafe_sum_2000_max_price_avgnan | cafe_avg_price_2000nan | cafe_sum_3000_min_price_avgnan | cafe_sum_3000_max_price_avgnan | cafe_avg_price_3000nan | prom_part_5000nan | cafe_sum_5000_min_price_avgnan | cafe_sum_5000_max_price_avgnan | cafe_avg_price_5000nan | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 43 | 27.0 | 4.0 | 12.0 | 1.0 | 1979.0 | 2.0 | 6.0 | 2.0 | 1 | 16 | 6.407578e+06 | 155572 | 0.189727 | 0.000070 | 9576 | 5001.0 | 5 | 10309 | 11065.0 | 5 | 0 | 240.0 | 1 | 0 | 7 | 3 | 0 | 0 | 16 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 86206 | 40477 | 45729 | 21154 | 11007 | 10147 | 98207 | 52277 | 45930 | 36211 | 10580 | 25631 | 9576 | 4899 | 4677 | 10309 | 5463 | 4846 | 23603 | 12286 | 11317 | 17508 | 9425 | 8083 | 18654 | 9709 | 8945 | 211.0 | 25.0 | 0.0 | 0.0 | 0.0 | 2.0 | 184.0 | 0.0 | 0.0 | 0.0 | 211.0 | 0.0 | 0.0 | 0.0 | 206.0 | 5.0 | 1 | 2.590241 | 1.131260 | 13.575119 | 1.131260 | 0.145700 | 0.177975 | 2.158587 | 0.600973 | 1.080934 | 23.683460 | 1.804127 | 3.633334 | 5.419893 | 65.038716 | 1.0 | 5.419893 | 6.905893 | 1 | 0.274985 | 3.299822 | 0.992631 | 1 | 1.422391 | 10.918587 | 13.100618 | 13.675657 | 15.156211 | 1.422391 | 1 | 0 | 3.830951 | 5 | 1.305159 | 1 | 14.231961 | 101 | 24.292406 | 1 | 18.152338 | 5.718519 | 1.210027 | 1.062513 | 5.814135 | 4.308127 | 10.814172 | 1.676258 | 0.485841 | 3.065047 | 1.107594 | 8.148591 | 3.516513 | 2.392353 | 4.248036 | 0.974743 | 6.715026 | 0.884350 | 0.648488 | 0.637189 | 0.947962 | 0.177975 | 0.625783 | 0.628187 | 3.932040 | 14.053047 | 7.389498 | 7.023705 | ... | 0 | 6 | 1 | 14.27 | 6.92 | 3 | 39554 | 9 | 171420 | 34 | 566.67 | 969.70 | 768.18 | 1 | 14 | 11 | 6 | 2 | 0 | 0 | 1 | 2 | 0 | 0 | 7 | 1 | 11.77 | 15.97 | 9 | 188854 | 19 | 1244891 | 36 | 614.29 | 1042.86 | 828.57 | 1 | 15 | 11 | 6 | 2 | 1 | 0 | 1 | 2 | 0 | 0 | 10 | 1 | 11.98 | 13.55 | 12 | 251554 | 23 | 1419204 | 68 | 639.68 | 1079.37 | 859.52 | 5 | 21 | 22 | 16 | 3 | 1 | 0 | 2 | 4 | 0 | 0 | 21 | 1 | 13.09 | 13.31 | 29 | 807385 | 52 | 4036616 | 152 | 708.57 | 1185.71 | 947.14 | 12 | 39 | 48 | 40 | 9 | 4 | 0 | 13 | 22 | 1 | 0 | 52 | 4 | 5850000 | 2011 | 8 | 20 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 2 | 34 | 19.0 | 3.0 | 12.0 | 1.0 | 1979.0 | 2.0 | 6.0 | 2.0 | 1 | 17 | 9.589337e+06 | 115352 | 0.372602 | 0.049637 | 6880 | 3119.0 | 5 | 7759 | 6237.0 | 8 | 0 | 229.0 | 1 | 0 | 6 | 1 | 1 | 1 | 3 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 76284 | 34200 | 42084 | 15727 | 7925 | 7802 | 70194 | 35622 | 34572 | 29431 | 9266 | 20165 | 6880 | 3466 | 3414 | 7759 | 3909 | 3850 | 17700 | 8998 | 8702 | 15164 | 7571 | 7593 | 13729 | 6929 | 6800 | 245.0 | 83.0 | 1.0 | 0.0 | 67.0 | 4.0 | 90.0 | 0.0 | 0.0 | 0.0 | 244.0 | 1.0 | 1.0 | 143.0 | 84.0 | 15.0 | 2 | 0.936700 | 0.647337 | 7.620630 | 0.635053 | 0.147754 | 0.273345 | 0.550690 | 0.065321 | 0.966479 | 1.317476 | 4.655004 | 8.648587 | 3.411993 | 40.943917 | 2.0 | 3.641773 | 4.679745 | 2 | 0.065263 | 0.783160 | 0.698081 | 1 | 9.503405 | 3.103996 | 6.444333 | 8.132640 | 8.698054 | 2.887377 | 2 | 0 | 3.103996 | 4 | 0.694536 | 1 | 9.242586 | 32 | 5.706113 | 2 | 9.034642 | 3.489954 | 2.724295 | 1.246149 | 3.419574 | 0.725560 | 6.910568 | 3.424716 | 0.668364 | 2.000154 | 8.972823 | 6.127073 | 1.161579 | 2.543747 | 12.649879 | 1.477723 | 1.852560 | 0.686252 | 0.519311 | 0.688796 | 1.072315 | 0.273345 | 0.967821 | 0.471447 | 4.841544 | 6.829889 | 0.709260 | 2.358840 | ... | 4 | 2 | 0 | 21.53 | 7.71 | 3 | 102910 | 7 | 127065 | 17 | 694.12 | 1205.88 | 950.00 | 0 | 6 | 7 | 1 | 2 | 1 | 0 | 1 | 5 | 0 | 4 | 9 | 0 | 22.37 | 19.25 | 4 | 165510 | 8 | 179065 | 21 | 695.24 | 1190.48 | 942.86 | 0 | 7 | 8 | 3 | 2 | 1 | 0 | 1 | 5 | 0 | 4 | 11 | 0 | 18.07 | 27.32 | 12 | 821986 | 14 | 491565 | 30 | 631.03 | 1086.21 | 858.62 | 1 | 11 | 11 | 4 | 2 | 1 | 0 | 1 | 7 | 0 | 6 | 19 | 1 | 10.26 | 27.47 | 66 | 2690465 | 40 | 2034942 | 177 | 673.81 | 1148.81 | 911.31 | 9 | 49 | 65 | 36 | 15 | 3 | 0 | 15 | 29 | 1 | 10 | 66 | 14 | 6000000 | 2011 | 8 | 23 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 3 | 43 | 29.0 | 2.0 | 12.0 | 1.0 | 1979.0 | 2.0 | 6.0 | 2.0 | 1 | 16 | 4.808270e+06 | 101708 | 0.112560 | 0.118537 | 5879 | 1463.0 | 4 | 6207 | 5580.0 | 7 | 0 | 1183.0 | 1 | 0 | 5 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | 101982 | 46076 | 55906 | 13028 | 6835 | 6193 | 63388 | 31813 | 31575 | 25292 | 7609 | 17683 | 5879 | 3095 | 2784 | 6207 | 3269 | 2938 | 14884 | 7821 | 7063 | 19401 | 9045 | 10356 | 11252 | 5916 | 5336 | 330.0 | 59.0 | 0.0 | 0.0 | 206.0 | 4.0 | 60.0 | 0.0 | 1.0 | 0.0 | 330.0 | 1.0 | 0.0 | 246.0 | 63.0 | 20.0 | 3 | 2.120999 | 1.637996 | 17.351515 | 1.445960 | 0.049102 | 0.158072 | 0.374848 | 0.453172 | 0.939275 | 4.912660 | 3.381083 | 11.996480 | 1.277658 | 15.331896 | 3.0 | 1.277658 | 1.701420 | 3 | 0.328756 | 3.945073 | 0.468265 | 1 | 5.604800 | 2.927487 | 6.963403 | 8.054252 | 9.067885 | 0.647250 | 3 | 0 | 2.927487 | 4 | 0.700691 | 1 | 9.540544 | 5 | 6.710302 | 3 | 5.777394 | 7.506612 | 0.772216 | 1.602183 | 3.682455 | 3.562188 | 5.752368 | 1.375443 | 0.733101 | 1.239304 | 1.978517 | 0.767569 | 1.952771 | 0.621357 | 7.682303 | 0.097144 | 0.841254 | 1.510089 | 1.486533 | 1.543049 | 0.391957 | 0.158072 | 3.178751 | 0.755946 | 7.922152 | 4.273200 | 3.156423 | 4.958214 | ... | 0 | 5 | 3 | 9.92 | 6.73 | 0 | 0 | 1 | 2600 | 14 | 516.67 | 916.67 | 716.67 | 2 | 4 | 6 | 2 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 6 | 5 | 12.99 | 12.75 | 4 | 100200 | 7 | 52550 | 24 | 563.64 | 977.27 | 770.45 | 2 | 8 | 9 | 4 | 1 | 0 | 0 | 0 | 4 | 0 | 0 | 8 | 5 | 12.14 | 26.46 | 8 | 110856 | 7 | 52550 | 41 | 697.44 | 1192.31 | 944.87 | 2 | 9 | 17 | 9 | 3 | 1 | 0 | 0 | 11 | 0 | 0 | 20 | 6 | 13.69 | 21.58 | 43 | 1478160 | 35 | 1572990 | 122 | 702.68 | 1196.43 | 949.55 | 10 | 29 | 45 | 25 | 10 | 3 | 0 | 11 | 27 | 0 | 4 | 67 | 10 | 5700000 | 2011 | 8 | 27 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 4 | 89 | 50.0 | 9.0 | 12.0 | 1.0 | 1979.0 | 2.0 | 6.0 | 2.0 | 1 | 18 | 1.258354e+07 | 178473 | 0.194703 | 0.069753 | 13087 | 6839.0 | 9 | 13670 | 17063.0 | 10 | 0 | 990.0 | 1 | 0 | 17 | 6 | 0 | 0 | 11 | 4 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 21155 | 9828 | 11327 | 28563 | 14680 | 13883 | 120381 | 60040 | 60341 | 29529 | 9083 | 20446 | 13087 | 6645 | 6442 | 13670 | 7126 | 6544 | 32063 | 16513 | 15550 | 3292 | 1450 | 1842 | 24934 | 12782 | 12152 | 458.0 | 9.0 | 51.0 | 12.0 | 124.0 | 50.0 | 201.0 | 0.0 | 9.0 | 2.0 | 459.0 | 13.0 | 24.0 | 40.0 | 130.0 | 252.0 | 4 | 1.489049 | 0.984537 | 11.565624 | 0.963802 | 0.179441 | 0.236455 | 0.078090 | 0.106125 | 0.451173 | 15.623710 | 2.017080 | 14.317640 | 4.291432 | 51.497190 | 4.0 | 3.816045 | 5.271136 | 4 | 0.131597 | 1.579164 | 1.200336 | 1 | 2.677824 | 14.606501 | 17.457198 | 18.309433 | 19.487005 | 2.677824 | 1 | 0 | 2.780449 | 17 | 1.999265 | 1 | 17.478380 | 83 | 6.734618 | 1 | 27.667863 | 9.522538 | 6.348716 | 1.767612 | 11.178333 | 0.583025 | 27.892717 | 0.811275 | 0.623484 | 1.950317 | 6.483172 | 7.385521 | 4.923843 | 3.549558 | 8.789894 | 2.163735 | 10.903161 | 0.622272 | 0.599914 | 0.934273 | 0.892674 | 0.236455 | 1.031777 | 1.561505 | 15.300449 | 16.990677 | 16.041521 | 5.029696 | ... | 0 | 3 | 1 | 28.38 | 6.57 | 2 | 11000 | 7 | 89492 | 23 | 673.91 | 1130.43 | 902.17 | 0 | 5 | 9 | 8 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | 2 | 32.29 | 5.73 | 2 | 11000 | 7 | 89492 | 25 | 660.00 | 1120.00 | 890.00 | 0 | 5 | 11 | 8 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 13 | 2 | 20.79 | 3.57 | 4 | 167000 | 12 | 205756 | 32 | 718.75 | 1218.75 | 968.75 | 0 | 5 | 14 | 10 | 3 | 0 | 0 | 1 | 2 | 0 | 0 | 18 | 3 | 14.18 | 3.89 | 8 | 244166 | 22 | 942180 | 61 | 931.58 | 1552.63 | 1242.11 | 4 | 7 | 21 | 15 | 11 | 2 | 1 | 4 | 4 | 0 | 0 | 26 | 3 | 13100000 | 2011 | 9 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 5 | 77 | 77.0 | 4.0 | 12.0 | 1.0 | 1979.0 | 2.0 | 6.0 | 2.0 | 1 | 16 | 8.398461e+06 | 108171 | 0.015234 | 0.037316 | 5706 | 3240.0 | 7 | 6748 | 7770.0 | 9 | 0 | 562.0 | 4 | 2 | 25 | 2 | 0 | 0 | 10 | 93 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 28179 | 13522 | 14657 | 13368 | 7159 | 6209 | 68043 | 34236 | 33807 | 26760 | 8563 | 18197 | 5706 | 2982 | 2724 | 6748 | 3664 | 3084 | 15237 | 8113 | 7124 | 5164 | 2583 | 2581 | 11631 | 6223 | 5408 | 746.0 | 48.0 | 0.0 | 0.0 | 643.0 | 16.0 | 35.0 | 0.0 | 3.0 | 1.0 | 746.0 | 371.0 | 114.0 | 146.0 | 62.0 | 53.0 | 5 | 1.257186 | 0.876620 | 8.266305 | 0.688859 | 0.247901 | 0.376838 | 0.258289 | 0.236214 | 0.392871 | 10.683540 | 2.936581 | 11.903910 | 0.853960 | 10.247521 | 5.0 | 1.595898 | 2.156284 | 113 | 0.071480 | 0.857764 | 0.820294 | 1 | 11.616653 | 1.721834 | 0.046810 | 0.787593 | 2.578671 | 1.721834 | 4 | 0 | 3.133531 | 10 | 0.084113 | 0 | 1.595898 | 113 | 1.423428 | 4 | 6.515857 | 8.671016 | 1.638318 | 3.632640 | 4.587917 | 2.609420 | 9.155057 | 1.969738 | 0.220288 | 2.544696 | 3.975401 | 3.610754 | 0.307915 | 1.864637 | 3.779781 | 1.121703 | 0.991683 | 0.892668 | 0.429052 | 0.077901 | 0.810801 | 0.376838 | 0.378756 | 0.121681 | 2.584370 | 1.112486 | 1.800125 | 1.339652 | ... | 6 | 7 | 0 | 4.12 | 4.83 | 93 | 1195735 | 9 | 445900 | 272 | 766.80 | 1272.73 | 1019.76 | 19 | 70 | 74 | 72 | 30 | 6 | 1 | 18 | 30 | 0 | 10 | 14 | 2 | 4.53 | 5.02 | 149 | 1625130 | 17 | 564843 | 483 | 765.93 | 1269.23 | 1017.58 | 28 | 130 | 129 | 131 | 50 | 14 | 1 | 35 | 61 | 0 | 17 | 21 | 3 | 5.06 | 8.62 | 305 | 3420907 | 60 | 2296870 | 1068 | 853.03 | 1410.45 | 1131.74 | 63 | 266 | 267 | 262 | 149 | 57 | 4 | 70 | 121 | 1 | 40 | 77 | 5 | 8.38 | 10.92 | 689 | 8404624 | 114 | 3503058 | 2283 | 853.88 | 1411.45 | 1132.66 | 143 | 566 | 578 | 552 | 319 | 108 | 17 | 135 | 236 | 2 | 91 | 195 | 14 | 16331452 | 2011 | 9 | 5 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 345 columns
test_data.head()
| id | full_sq | life_sq | floor | max_floor | material | build_year | num_room | kitch_sq | state | product_type | sub_area | area_m | raion_popul | green_zone_part | indust_part | children_preschool | preschool_quota | preschool_education_centers_raion | children_school | school_quota | school_education_centers_raion | school_education_centers_top_20_raion | hospital_beds_raion | healthcare_centers_raion | university_top_20_raion | sport_objects_raion | additional_education_raion | culture_objects_top_25 | culture_objects_top_25_raion | shopping_centers_raion | office_raion | thermal_power_plant_raion | incineration_raion | oil_chemistry_raion | radiation_raion | railroad_terminal_raion | big_market_raion | nuclear_reactor_raion | detention_facility_raion | full_all | male_f | female_f | young_all | young_male | young_female | work_all | work_male | work_female | ekder_all | ekder_male | ekder_female | 0_6_all | 0_6_male | 0_6_female | 7_14_all | 7_14_male | 7_14_female | 0_17_all | 0_17_male | 0_17_female | 16_29_all | 16_29_male | 16_29_female | 0_13_all | 0_13_male | 0_13_female | raion_build_count_with_material_info | build_count_block | build_count_wood | build_count_frame | build_count_brick | build_count_monolith | build_count_panel | build_count_foam | build_count_slag | build_count_mix | raion_build_count_with_builddate_info | build_count_before_1920 | build_count_1921-1945 | build_count_1946-1970 | build_count_1971-1995 | build_count_after_1995 | ID_metro | metro_min_avto | metro_km_avto | metro_min_walk | metro_km_walk | kindergarten_km | school_km | park_km | green_zone_km | industrial_km | water_treatment_km | cemetery_km | incineration_km | railroad_station_walk_km | railroad_station_walk_min | ID_railroad_station_walk | railroad_station_avto_km | railroad_station_avto_min | ID_railroad_station_avto | public_transport_station_km | public_transport_station_min_walk | water_km | water_1line | mkad_km | ttk_km | sadovoe_km | bulvar_ring_km | kremlin_km | big_road1_km | ID_big_road1 | big_road1_1line | big_road2_km | ID_big_road2 | railroad_km | railroad_1line | zd_vokzaly_avto_km | ID_railroad_terminal | bus_terminal_avto_km | ID_bus_terminal | oil_chemistry_km | nuclear_reactor_km | radiation_km | power_transmission_line_km | thermal_power_plant_km | ts_km | big_market_km | market_shop_km | fitness_km | swim_pool_km | ice_rink_km | stadium_km | basketball_km | hospice_morgue_km | detention_facility_km | public_healthcare_km | university_km | workplaces_km | shopping_centers_km | office_km | additional_education_km | preschool_km | big_church_km | church_synagogue_km | mosque_km | theater_km | museum_km | exhibition_km | ... | cafe_count_1000_price_4000 | cafe_count_1000_price_high | big_church_count_1000 | church_count_1000 | mosque_count_1000 | leisure_count_1000 | sport_count_1000 | market_count_1000 | green_part_1500 | prom_part_1500 | office_count_1500 | office_sqm_1500 | trc_count_1500 | trc_sqm_1500 | cafe_count_1500 | cafe_sum_1500_min_price_avg | cafe_sum_1500_max_price_avg | cafe_avg_price_1500 | cafe_count_1500_na_price | cafe_count_1500_price_500 | cafe_count_1500_price_1000 | cafe_count_1500_price_1500 | cafe_count_1500_price_2500 | cafe_count_1500_price_4000 | cafe_count_1500_price_high | big_church_count_1500 | church_count_1500 | mosque_count_1500 | leisure_count_1500 | sport_count_1500 | market_count_1500 | green_part_2000 | prom_part_2000 | office_count_2000 | office_sqm_2000 | trc_count_2000 | trc_sqm_2000 | cafe_count_2000 | cafe_sum_2000_min_price_avg | cafe_sum_2000_max_price_avg | cafe_avg_price_2000 | cafe_count_2000_na_price | cafe_count_2000_price_500 | cafe_count_2000_price_1000 | cafe_count_2000_price_1500 | cafe_count_2000_price_2500 | cafe_count_2000_price_4000 | cafe_count_2000_price_high | big_church_count_2000 | church_count_2000 | mosque_count_2000 | leisure_count_2000 | sport_count_2000 | market_count_2000 | green_part_3000 | prom_part_3000 | office_count_3000 | office_sqm_3000 | trc_count_3000 | trc_sqm_3000 | cafe_count_3000 | cafe_sum_3000_min_price_avg | cafe_sum_3000_max_price_avg | cafe_avg_price_3000 | cafe_count_3000_na_price | cafe_count_3000_price_500 | cafe_count_3000_price_1000 | cafe_count_3000_price_1500 | cafe_count_3000_price_2500 | cafe_count_3000_price_4000 | cafe_count_3000_price_high | big_church_count_3000 | church_count_3000 | mosque_count_3000 | leisure_count_3000 | sport_count_3000 | market_count_3000 | green_part_5000 | prom_part_5000 | office_count_5000 | office_sqm_5000 | trc_count_5000 | trc_sqm_5000 | cafe_count_5000 | cafe_sum_5000_min_price_avg | cafe_sum_5000_max_price_avg | cafe_avg_price_5000 | cafe_count_5000_na_price | cafe_count_5000_price_500 | cafe_count_5000_price_1000 | cafe_count_5000_price_1500 | cafe_count_5000_price_2500 | cafe_count_5000_price_4000 | cafe_count_5000_price_high | big_church_count_5000 | church_count_5000 | mosque_count_5000 | leisure_count_5000 | sport_count_5000 | market_count_5000 | year | month | day | life_sqnan | build_yearnan | statenan | preschool_quotanan | school_quotanan | hospital_beds_raionnan | raion_build_count_with_material_infonan | build_count_blocknan | build_count_woodnan | build_count_framenan | build_count_bricknan | build_count_monolithnan | build_count_panelnan | build_count_foamnan | build_count_slagnan | build_count_mixnan | raion_build_count_with_builddate_infonan | build_count_before_1920nan | build_count_1921-1945nan | build_count_1946-1970nan | build_count_1971-1995nan | build_count_after_1995nan | metro_min_walknan | metro_km_walknan | railroad_station_walk_kmnan | railroad_station_walk_minnan | ID_railroad_station_walknan | cafe_sum_500_min_price_avgnan | cafe_sum_500_max_price_avgnan | cafe_avg_price_500nan | cafe_sum_1000_min_price_avgnan | cafe_sum_1000_max_price_avgnan | cafe_avg_price_1000nan | cafe_sum_1500_min_price_avgnan | cafe_sum_1500_max_price_avgnan | cafe_avg_price_1500nan | green_part_2000nan | cafe_sum_2000_min_price_avgnan | cafe_sum_2000_max_price_avgnan | cafe_avg_price_2000nan | cafe_sum_3000_min_price_avgnan | cafe_sum_3000_max_price_avgnan | cafe_avg_price_3000nan | prom_part_5000nan | cafe_sum_5000_min_price_avgnan | cafe_sum_5000_max_price_avgnan | cafe_avg_price_5000nan | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 30474 | 39.0 | 20.7 | 2 | 9 | 1 | 1998.0 | 1 | 8.9 | 3.0 | 7661 | 7661 | 2.615514e+07 | 178264 | 0.137846 | 0.041116 | 14080 | 11926.0 | 11 | 14892 | 24750.0 | 13 | 1 | 990.0 | 1 | 0 | 13 | 4 | 7661 | 0 | 4 | 4 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 102618 | 47681 | 54937 | 30808 | 16251 | 14557 | 121369 | 59138 | 62231 | 26087 | 7410 | 18677 | 14080 | 7457 | 6623 | 14892 | 7839 | 7053 | 34341 | 18094 | 16247 | 19906 | 9676 | 10230 | 27123 | 14340 | 12783 | 1681.0 | 173.0 | 607.0 | 19.0 | 245.0 | 116.0 | 431.0 | 1.0 | 84.0 | 5.0 | 1680.0 | 34.0 | 299.0 | 439.0 | 109.0 | 799.0 | 45 | 1.258957 | 0.735908 | 8.830901 | 0.735908 | 0.078502 | 0.746962 | 2.048487 | 0.061485 | 1.205404 | 0.967696 | 0.781053 | 10.56540 | 4.812102 | 57.745220 | 39.0 | 4.850748 | 6.274963 | 39 | 0.114134 | 1.369603 | 0.248151 | 7661 | 6.374826 | 19.651101 | 22.790985 | 24.079707 | 24.779082 | 4.152246 | 2 | 7661 | 5.706484 | 38 | 0.490549 | 7661 | 27.553486 | 32 | 8.424959 | 9 | 22.624362 | 16.224083 | 6.620081 | 4.121874 | 8.957780 | 8.824060 | 15.483912 | 5.353674 | 0.225788 | 3.673942 | 11.810839 | 20.392427 | 9.131977 | 3.300120 | 25.462741 | 1.613152 | 17.214870 | 7.922610 | 2.414138 | 4.923614 | 0.514211 | 0.746962 | 0.749142 | 0.848297 | 1.917736 | 19.953413 | 14.052207 | 12.228576 | ... | 0 | 0 | 1 | 1 | 0 | 0 | 3 | 0 | 20.14 | 0.70 | 0 | 0 | 0 | 0 | 2 | 1000.00 | 1500.00 | 1250.00 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 4 | 0 | 15.17 | 1.18 | 0 | 0 | 0 | 0 | 3 | 1000.00 | 1500.00 | 1250.00 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 1 | 2 | 1 | 0 | 5 | 0 | 14.69 | 2.87 | 0 | 0 | 3 | 73000 | 12 | 781.82 | 1227.27 | 1004.55 | 1 | 2 | 2 | 7 | 0 | 0 | 0 | 1 | 3 | 1 | 0 | 7 | 0 | 21.58 | 4.69 | 1 | 37550 | 8 | 299166 | 19 | 676.47 | 1088.24 | 882.35 | 2 | 5 | 4 | 8 | 0 | 0 | 0 | 1 | 10 | 1 | 0 | 14 | 1 | 2015 | 7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 30475 | 79.2 | 30.4 | 8 | 17 | 1 | 0.0 | 3 | 1.0 | 1.0 | 7661 | 7661 | 2.553630e+07 | 4001 | 0.496315 | 0.007122 | 275 | 3062.0 | 0 | 264 | 6974.0 | 0 | 0 | 990.0 | 0 | 0 | 0 | 0 | 7661 | 0 | 1 | 0 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 17790 | 8350 | 9443 | 574 | 297 | 277 | 2566 | 1356 | 1211 | 861 | 244 | 617 | 275 | 143 | 133 | 264 | 136 | 128 | 646 | 336 | 311 | 3796 | 2035 | 1762 | 506 | 261 | 245 | 295.0 | 44.0 | 1.0 | 0.0 | 84.0 | 7.0 | 92.0 | 0.0 | 0.0 | 0.0 | 295.0 | 0.0 | 2.0 | 144.0 | 73.0 | 31.0 | 21 | 4.230425 | 3.444625 | 41.335498 | 3.444625 | 1.192193 | 1.332570 | 4.400427 | 0.000000 | 0.742377 | 16.049420 | 2.244906 | 18.50054 | 5.458057 | 65.496687 | 24.0 | 5.458057 | 6.859956 | 24 | 0.826083 | 9.912993 | 0.799853 | 7661 | 6.847813 | 16.975793 | 19.692960 | 20.864427 | 21.722620 | 2.148398 | 13 | 7661 | 4.410488 | 27 | 2.342346 | 7661 | 27.421853 | 50 | 16.913175 | 8 | 29.425443 | 17.080113 | 8.545593 | 4.932827 | 10.039833 | 3.654955 | 15.092542 | 8.156185 | 1.313180 | 4.244082 | 4.438242 | 13.445121 | 8.332180 | 7.095895 | 26.807204 | 3.775300 | 12.440198 | 9.672779 | 1.764298 | 3.764819 | 1.694967 | 1.332570 | 1.672126 | 1.162371 | 12.239901 | 13.006107 | 9.661063 | 4.323941 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 59.28 | 5.31 | 0 | 0 | 0 | 0 | 3 | 833.33 | 1500.00 | 1166.67 | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 49.26 | 4.06 | 0 | 0 | 1 | 5000 | 7 | 757.14 | 1285.71 | 1021.43 | 0 | 1 | 3 | 2 | 1 | 0 | 0 | 1 | 2 | 0 | 0 | 1 | 0 | 39.50 | 3.32 | 0 | 0 | 2 | 22000 | 10 | 680.00 | 1200.00 | 940.00 | 0 | 1 | 6 | 2 | 1 | 0 | 0 | 1 | 5 | 0 | 0 | 7 | 0 | 39.10 | 7.70 | 2 | 177300 | 6 | 231300 | 20 | 733.33 | 1250.00 | 991.67 | 2 | 4 | 8 | 4 | 1 | 1 | 0 | 2 | 11 | 0 | 1 | 12 | 1 | 2015 | 7 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 30476 | 40.5 | 25.1 | 3 | 5 | 2 | 1960.0 | 2 | 4.8 | 2.0 | 7661 | 7661 | 9.946335e+06 | 139322 | 0.065409 | 0.225825 | 6400 | 2232.0 | 7 | 6558 | 7966.0 | 7 | 0 | 1548.0 | 3 | 0 | 13 | 0 | 7661 | 0 | 2 | 7 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 36154 | 16222 | 19932 | 13799 | 6937 | 6862 | 91795 | 44734 | 47061 | 33728 | 9653 | 24075 | 6400 | 3209 | 3191 | 6558 | 3317 | 3241 | 15514 | 7813 | 7701 | 8137 | 3787 | 4350 | 12162 | 6117 | 6045 | 561.0 | 111.0 | 0.0 | 0.0 | 254.0 | 3.0 | 189.0 | 0.0 | 4.0 | 0.0 | 561.0 | 0.0 | 5.0 | 437.0 | 79.0 | 40.0 | 44 | 1.585306 | 1.122214 | 13.466563 | 1.122214 | 0.065324 | 0.194608 | 2.513006 | 0.580638 | 0.900408 | 11.749900 | 3.389848 | 10.19563 | 3.628293 | 43.539514 | 68.0 | 3.977659 | 5.375048 | 59 | 0.116686 | 1.400229 | 1.384824 | 7661 | 3.499380 | 5.627481 | 8.090528 | 8.671086 | 10.320728 | 0.580638 | 10 | 7661 | 3.499380 | 1 | 2.220941 | 7661 | 10.093318 | 5 | 7.921607 | 3 | 1.823381 | 14.431252 | 0.826743 | 2.388288 | 3.760642 | 3.290966 | 16.304596 | 0.644830 | 0.966254 | 1.332737 | 3.131143 | 1.464174 | 1.499581 | 0.487817 | 6.718082 | 0.711768 | 4.862872 | 3.506298 | 1.456661 | 1.223804 | 2.330995 | 0.194608 | 1.400094 | 1.177527 | 9.938735 | 2.983875 | 1.988346 | 0.794245 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 6 | 2 | 30.97 | 8.75 | 2 | 34100 | 1 | 0 | 19 | 655.56 | 1111.11 | 883.33 | 1 | 6 | 6 | 4 | 2 | 0 | 0 | 1 | 1 | 0 | 1 | 12 | 3 | 40.90 | 10.51 | 6 | 80237 | 3 | 14090 | 28 | 633.33 | 1092.59 | 862.96 | 1 | 7 | 12 | 6 | 2 | 0 | 0 | 2 | 2 | 0 | 4 | 14 | 4 | 45.86 | 9.08 | 8 | 215237 | 6 | 39106 | 37 | 608.33 | 1069.44 | 838.89 | 1 | 8 | 19 | 7 | 2 | 0 | 0 | 2 | 3 | 0 | 5 | 22 | 4 | 25.62 | 13.59 | 27 | 427889 | 26 | 1024431 | 179 | 668.97 | 1132.18 | 900.57 | 5 | 53 | 64 | 42 | 11 | 4 | 0 | 10 | 21 | 0 | 10 | 71 | 11 | 2015 | 7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 30477 | 62.8 | 36.0 | 17 | 17 | 1 | 2016.0 | 2 | 62.8 | 3.0 | 7661 | 7661 | 2.149409e+07 | 7122 | 0.262459 | 0.017647 | 489 | 3062.0 | 0 | 469 | 6974.0 | 0 | 0 | 990.0 | 0 | 0 | 0 | 2 | 7661 | 0 | 0 | 0 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 9553 | 4529 | 5024 | 1021 | 529 | 493 | 4568 | 2414 | 2155 | 1533 | 435 | 1099 | 489 | 254 | 236 | 469 | 242 | 228 | 1150 | 597 | 553 | 2155 | 1206 | 950 | 900 | 465 | 435 | 295.0 | 44.0 | 1.0 | 0.0 | 84.0 | 7.0 | 92.0 | 0.0 | 0.0 | 0.0 | 295.0 | 0.0 | 2.0 | 144.0 | 73.0 | 31.0 | 45 | 7.931398 | 6.038848 | 68.559794 | 5.713316 | 3.189083 | 3.540105 | 5.612835 | 0.025446 | 0.466738 | 5.061917 | 2.701804 | 14.62944 | 10.284167 | 123.410001 | 39.0 | 10.609698 | 13.517419 | 39 | 3.093209 | 37.118504 | 0.233017 | 7661 | 8.928836 | 22.094252 | 25.062928 | 26.226045 | 26.960463 | 2.722667 | 38 | 7661 | 8.601110 | 2 | 4.476081 | 7661 | 37.436772 | 50 | 13.979650 | 8 | 26.895118 | 19.942295 | 9.434351 | 6.218331 | 13.345715 | 10.480798 | 10.723870 | 11.112624 | 4.480234 | 8.577223 | 15.200509 | 18.560234 | 12.253021 | 6.831966 | 30.366022 | 5.731266 | 15.382678 | 11.306566 | 6.589381 | 8.102094 | 0.403429 | 3.540105 | 5.411312 | 0.213853 | 6.153091 | 18.121220 | 16.938290 | 14.171229 | ... | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 28.82 | 3.59 | 0 | 0 | 0 | 0 | 1 | 300.00 | 500.00 | 400.00 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 31.35 | 2.99 | 0 | 0 | 0 | 0 | 1 | 300.00 | 500.00 | 400.00 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 34.87 | 1.34 | 0 | 0 | 0 | 0 | 1 | 300.00 | 500.00 | 400.00 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 24.25 | 1.66 | 0 | 0 | 0 | 0 | 5 | 1560.00 | 2500.00 | 2030.00 | 0 | 1 | 0 | 1 | 1 | 2 | 0 | 0 | 10 | 0 | 0 | 2 | 0 | 2015 | 7 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 30478 | 40.0 | 40.0 | 17 | 17 | 1 | 0.0 | 1 | 1.0 | 1.0 | 7661 | 7661 | 2.553630e+07 | 4001 | 0.496315 | 0.007122 | 275 | 3062.0 | 0 | 264 | 6974.0 | 0 | 0 | 990.0 | 0 | 0 | 0 | 0 | 7661 | 0 | 1 | 0 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 7661 | 17790 | 8350 | 9443 | 574 | 297 | 277 | 2566 | 1356 | 1211 | 861 | 244 | 617 | 275 | 143 | 133 | 264 | 136 | 128 | 646 | 336 | 311 | 3796 | 2035 | 1762 | 506 | 261 | 245 | 295.0 | 44.0 | 1.0 | 0.0 | 84.0 | 7.0 | 92.0 | 0.0 | 0.0 | 0.0 | 295.0 | 0.0 | 2.0 | 144.0 | 73.0 | 31.0 | 21 | 2.152792 | 1.722233 | 20.666800 | 1.722233 | 0.897889 | 1.234235 | 4.566595 | 0.427248 | 0.353642 | 16.784630 | 2.250137 | 19.14919 | 3.735666 | 44.827989 | 24.0 | 3.735666 | 4.782323 | 24 | 0.630014 | 7.560163 | 0.394422 | 7661 | 7.123215 | 17.148737 | 19.868997 | 21.038561 | 21.905792 | 2.808077 | 13 | 7661 | 3.688405 | 27 | 1.727223 | 7661 | 25.699461 | 50 | 17.366661 | 8 | 29.968660 | 17.397666 | 9.036942 | 5.506770 | 10.102328 | 3.729416 | 15.546028 | 6.433794 | 1.519553 | 2.521691 | 2.715850 | 13.898607 | 8.355285 | 7.401423 | 25.084813 | 2.052908 | 12.893684 | 9.479093 | 1.806570 | 4.338453 | 1.339078 | 1.234235 | 1.192543 | 1.186621 | 12.652956 | 13.459593 | 9.890758 | 4.555385 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 43.85 | 1.55 | 0 | 0 | 0 | 0 | 3 | 833.33 | 1500.00 | 1166.67 | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 38.61 | 3.12 | 0 | 0 | 2 | 22000 | 7 | 757.14 | 1285.71 | 1021.43 | 0 | 1 | 3 | 2 | 1 | 0 | 0 | 1 | 2 | 0 | 0 | 3 | 0 | 41.64 | 2.11 | 0 | 0 | 2 | 22000 | 9 | 700.00 | 1222.22 | 961.11 | 0 | 1 | 5 | 2 | 1 | 0 | 0 | 1 | 4 | 0 | 0 | 6 | 0 | 35.62 | 6.96 | 1 | 117300 | 4 | 201300 | 20 | 747.37 | 1263.16 | 1005.26 | 1 | 4 | 8 | 5 | 1 | 1 | 0 | 2 | 12 | 0 | 1 | 11 | 1 | 2015 | 7 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 340 columns
continuous_feature
['full_sq', 'life_sq', 'floor', 'max_floor', 'build_year', 'kitch_sq', 'area_m', 'raion_popul', 'green_zone_part', 'indust_part', 'children_preschool', 'preschool_quota', 'children_school', 'school_quota', 'hospital_beds_raion', 'office_raion', 'full_all', 'male_f', 'female_f', 'young_all', 'young_male', 'young_female', 'work_all', 'work_male', 'work_female', 'ekder_all', 'ekder_male', 'ekder_female', '0_6_all', '0_6_male', '0_6_female', '7_14_all', '7_14_male', '7_14_female', '0_17_all', '0_17_male', '0_17_female', '16_29_all', '16_29_male', '16_29_female', '0_13_all', '0_13_male', '0_13_female', 'raion_build_count_with_material_info', 'build_count_block', 'build_count_wood', 'build_count_brick', 'build_count_monolith', 'build_count_panel', 'build_count_before_1920', 'build_count_1921-1945', 'build_count_1946-1970', 'build_count_1971-1995', 'build_count_after_1995', 'ID_metro', 'metro_min_avto', 'metro_km_avto', 'metro_min_walk', 'metro_km_walk', 'kindergarten_km', 'school_km', 'park_km', 'green_zone_km', 'industrial_km', 'water_treatment_km', 'cemetery_km', 'incineration_km', 'railroad_station_walk_km', 'railroad_station_walk_min', 'ID_railroad_station_walk', 'railroad_station_avto_km', 'railroad_station_avto_min', 'ID_railroad_station_avto', 'public_transport_station_km', 'public_transport_station_min_walk', 'water_km', 'mkad_km', 'ttk_km', 'sadovoe_km', 'bulvar_ring_km', 'kremlin_km', 'big_road1_km', 'ID_big_road1', 'big_road2_km', 'ID_big_road2', 'railroad_km', 'zd_vokzaly_avto_km', 'bus_terminal_avto_km', 'oil_chemistry_km', 'nuclear_reactor_km', 'radiation_km', 'power_transmission_line_km', 'thermal_power_plant_km', 'ts_km', 'big_market_km', 'market_shop_km', 'fitness_km', 'swim_pool_km', 'ice_rink_km', 'stadium_km', 'basketball_km', 'hospice_morgue_km', 'detention_facility_km', 'public_healthcare_km', 'university_km', 'workplaces_km', 'shopping_centers_km', 'office_km', 'additional_education_km', 'preschool_km', 'big_church_km', 'church_synagogue_km', 'mosque_km', 'theater_km', 'museum_km', 'exhibition_km', 'catering_km', 'green_part_500', 'prom_part_500', 'office_count_500', 'office_sqm_500', 'trc_sqm_500', 'cafe_count_500', 'cafe_sum_500_min_price_avg', 'cafe_sum_500_max_price_avg', 'cafe_avg_price_500', 'cafe_count_500_price_500', 'cafe_count_500_price_1000', 'cafe_count_500_price_1500', 'green_part_1000', 'prom_part_1000', 'office_count_1000', 'office_sqm_1000', 'trc_sqm_1000', 'cafe_count_1000', 'cafe_sum_1000_min_price_avg', 'cafe_sum_1000_max_price_avg', 'cafe_avg_price_1000', 'cafe_count_1000_na_price', 'cafe_count_1000_price_500', 'cafe_count_1000_price_1000', 'cafe_count_1000_price_1500', 'cafe_count_1000_price_2500', 'cafe_count_1000_price_4000', 'church_count_1000', 'leisure_count_1000', 'green_part_1500', 'prom_part_1500', 'office_count_1500', 'office_sqm_1500', 'trc_count_1500', 'trc_sqm_1500', 'cafe_count_1500', 'cafe_sum_1500_min_price_avg', 'cafe_sum_1500_max_price_avg', 'cafe_avg_price_1500', 'cafe_count_1500_na_price', 'cafe_count_1500_price_500', 'cafe_count_1500_price_1000', 'cafe_count_1500_price_1500', 'cafe_count_1500_price_2500', 'cafe_count_1500_price_4000', 'big_church_count_1500', 'church_count_1500', 'leisure_count_1500', 'sport_count_1500', 'green_part_2000', 'prom_part_2000', 'office_count_2000', 'office_sqm_2000', 'trc_count_2000', 'trc_sqm_2000', 'cafe_count_2000', 'cafe_sum_2000_min_price_avg', 'cafe_sum_2000_max_price_avg', 'cafe_avg_price_2000', 'cafe_count_2000_na_price', 'cafe_count_2000_price_500', 'cafe_count_2000_price_1000', 'cafe_count_2000_price_1500', 'cafe_count_2000_price_2500', 'cafe_count_2000_price_4000', 'big_church_count_2000', 'church_count_2000', 'leisure_count_2000', 'sport_count_2000', 'green_part_3000', 'prom_part_3000', 'office_count_3000', 'office_sqm_3000', 'trc_count_3000', 'trc_sqm_3000', 'cafe_count_3000', 'cafe_sum_3000_min_price_avg', 'cafe_sum_3000_max_price_avg', 'cafe_avg_price_3000', 'cafe_count_3000_na_price', 'cafe_count_3000_price_500', 'cafe_count_3000_price_1000', 'cafe_count_3000_price_1500', 'cafe_count_3000_price_2500', 'cafe_count_3000_price_4000', 'big_church_count_3000', 'church_count_3000', 'leisure_count_3000', 'sport_count_3000', 'green_part_5000', 'prom_part_5000', 'office_count_5000', 'office_sqm_5000', 'trc_count_5000', 'trc_sqm_5000', 'cafe_count_5000', 'cafe_sum_5000_min_price_avg', 'cafe_sum_5000_max_price_avg', 'cafe_avg_price_5000', 'cafe_count_5000_na_price', 'cafe_count_5000_price_500', 'cafe_count_5000_price_1000', 'cafe_count_5000_price_1500', 'cafe_count_5000_price_2500', 'cafe_count_5000_price_4000', 'cafe_count_5000_price_high', 'big_church_count_5000', 'church_count_5000', 'leisure_count_5000', 'sport_count_5000', 'price_doc']
import numpy as np
num_features=continuous_feature
for feature in num_features:
train_data[feature]=np.log(train_data[feature])
for feature in continuous_feature_test:
data=test_data.copy()
if 0 in data[feature].unique():
pass
else:
data[feature]=np.log(data[feature])
#data['SalePrice']=np.log(data['SalePrice'])
#plt.scatter(data[feature],data['SalePrice'])
plt.xlabel(feature)
plt.ylabel('SalesPrice')
plt.title(feature)
plt.show()
import numpy as np
num_features=continuous_feature_test
for feature in num_features:
train_data[feature]=np.log(train_data[feature])
train_data.head()
| id | full_sq | life_sq | floor | max_floor | material | build_year | num_room | kitch_sq | state | product_type | sub_area | area_m | raion_popul | green_zone_part | indust_part | children_preschool | preschool_quota | preschool_education_centers_raion | children_school | school_quota | school_education_centers_raion | school_education_centers_top_20_raion | hospital_beds_raion | healthcare_centers_raion | university_top_20_raion | sport_objects_raion | additional_education_raion | culture_objects_top_25 | culture_objects_top_25_raion | shopping_centers_raion | office_raion | thermal_power_plant_raion | incineration_raion | oil_chemistry_raion | radiation_raion | railroad_terminal_raion | big_market_raion | nuclear_reactor_raion | detention_facility_raion | full_all | male_f | female_f | young_all | young_male | young_female | work_all | work_male | work_female | ekder_all | ekder_male | ekder_female | 0_6_all | 0_6_male | 0_6_female | 7_14_all | 7_14_male | 7_14_female | 0_17_all | 0_17_male | 0_17_female | 16_29_all | 16_29_male | 16_29_female | 0_13_all | 0_13_male | 0_13_female | raion_build_count_with_material_info | build_count_block | build_count_wood | build_count_frame | build_count_brick | build_count_monolith | build_count_panel | build_count_foam | build_count_slag | build_count_mix | raion_build_count_with_builddate_info | build_count_before_1920 | build_count_1921-1945 | build_count_1946-1970 | build_count_1971-1995 | build_count_after_1995 | ID_metro | metro_min_avto | metro_km_avto | metro_min_walk | metro_km_walk | kindergarten_km | school_km | park_km | green_zone_km | industrial_km | water_treatment_km | cemetery_km | incineration_km | railroad_station_walk_km | railroad_station_walk_min | ID_railroad_station_walk | railroad_station_avto_km | railroad_station_avto_min | ID_railroad_station_avto | public_transport_station_km | public_transport_station_min_walk | water_km | water_1line | mkad_km | ttk_km | sadovoe_km | bulvar_ring_km | kremlin_km | big_road1_km | ID_big_road1 | big_road1_1line | big_road2_km | ID_big_road2 | railroad_km | railroad_1line | zd_vokzaly_avto_km | ID_railroad_terminal | bus_terminal_avto_km | ID_bus_terminal | oil_chemistry_km | nuclear_reactor_km | radiation_km | power_transmission_line_km | thermal_power_plant_km | ts_km | big_market_km | market_shop_km | fitness_km | swim_pool_km | ice_rink_km | stadium_km | basketball_km | hospice_morgue_km | detention_facility_km | public_healthcare_km | university_km | workplaces_km | shopping_centers_km | office_km | additional_education_km | preschool_km | big_church_km | church_synagogue_km | mosque_km | theater_km | museum_km | exhibition_km | ... | leisure_count_1000 | sport_count_1000 | market_count_1000 | green_part_1500 | prom_part_1500 | office_count_1500 | office_sqm_1500 | trc_count_1500 | trc_sqm_1500 | cafe_count_1500 | cafe_sum_1500_min_price_avg | cafe_sum_1500_max_price_avg | cafe_avg_price_1500 | cafe_count_1500_na_price | cafe_count_1500_price_500 | cafe_count_1500_price_1000 | cafe_count_1500_price_1500 | cafe_count_1500_price_2500 | cafe_count_1500_price_4000 | cafe_count_1500_price_high | big_church_count_1500 | church_count_1500 | mosque_count_1500 | leisure_count_1500 | sport_count_1500 | market_count_1500 | green_part_2000 | prom_part_2000 | office_count_2000 | office_sqm_2000 | trc_count_2000 | trc_sqm_2000 | cafe_count_2000 | cafe_sum_2000_min_price_avg | cafe_sum_2000_max_price_avg | cafe_avg_price_2000 | cafe_count_2000_na_price | cafe_count_2000_price_500 | cafe_count_2000_price_1000 | cafe_count_2000_price_1500 | cafe_count_2000_price_2500 | cafe_count_2000_price_4000 | cafe_count_2000_price_high | big_church_count_2000 | church_count_2000 | mosque_count_2000 | leisure_count_2000 | sport_count_2000 | market_count_2000 | green_part_3000 | prom_part_3000 | office_count_3000 | office_sqm_3000 | trc_count_3000 | trc_sqm_3000 | cafe_count_3000 | cafe_sum_3000_min_price_avg | cafe_sum_3000_max_price_avg | cafe_avg_price_3000 | cafe_count_3000_na_price | cafe_count_3000_price_500 | cafe_count_3000_price_1000 | cafe_count_3000_price_1500 | cafe_count_3000_price_2500 | cafe_count_3000_price_4000 | cafe_count_3000_price_high | big_church_count_3000 | church_count_3000 | mosque_count_3000 | leisure_count_3000 | sport_count_3000 | market_count_3000 | green_part_5000 | prom_part_5000 | office_count_5000 | office_sqm_5000 | trc_count_5000 | trc_sqm_5000 | cafe_count_5000 | cafe_sum_5000_min_price_avg | cafe_sum_5000_max_price_avg | cafe_avg_price_5000 | cafe_count_5000_na_price | cafe_count_5000_price_500 | cafe_count_5000_price_1000 | cafe_count_5000_price_1500 | cafe_count_5000_price_2500 | cafe_count_5000_price_4000 | cafe_count_5000_price_high | big_church_count_5000 | church_count_5000 | mosque_count_5000 | leisure_count_5000 | sport_count_5000 | market_count_5000 | price_doc | year | month | day | life_sqnan | floornan | max_floornan | materialnan | build_yearnan | num_roomnan | kitch_sqnan | statenan | preschool_quotanan | school_quotanan | hospital_beds_raionnan | raion_build_count_with_material_infonan | build_count_blocknan | build_count_woodnan | build_count_framenan | build_count_bricknan | build_count_monolithnan | build_count_panelnan | build_count_foamnan | build_count_slagnan | build_count_mixnan | raion_build_count_with_builddate_infonan | build_count_before_1920nan | build_count_1921-1945nan | build_count_1946-1970nan | build_count_1971-1995nan | build_count_after_1995nan | metro_min_walknan | metro_km_walknan | railroad_station_walk_kmnan | railroad_station_walk_minnan | ID_railroad_station_walknan | cafe_sum_500_min_price_avgnan | cafe_sum_500_max_price_avgnan | cafe_avg_price_500nan | cafe_sum_1000_min_price_avgnan | cafe_sum_1000_max_price_avgnan | cafe_avg_price_1000nan | cafe_sum_1500_min_price_avgnan | cafe_sum_1500_max_price_avgnan | cafe_avg_price_1500nan | cafe_sum_2000_min_price_avgnan | cafe_sum_2000_max_price_avgnan | cafe_avg_price_2000nan | cafe_sum_3000_min_price_avgnan | cafe_sum_3000_max_price_avgnan | cafe_avg_price_3000nan | prom_part_5000nan | cafe_sum_5000_min_price_avgnan | cafe_sum_5000_max_price_avgnan | cafe_avg_price_5000nan | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.000000 | 1.324738 | 1.192660 | 0.326634 | 0.910235 | 1.0 | 2.026877 | 2.0 | 0.583198 | 2.0 | 1 | 16 | 2.751939 | 2.481138 | NaN | NaN | 2.215612 | 2.142110 | 5 | 2.223625 | 2.231255 | 5 | 0 | 1.701222 | 1 | 0 | 7 | 3 | 0 | 0 | 16 | -inf | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 2.430494 | 2.361655 | 2.373089 | 2.298535 | 2.230690 | 2.221910 | 2.441898 | 2.385483 | 2.373498 | 2.351101 | 2.226430 | 2.317627 | 2.215612 | 2.139688 | 2.134215 | 2.223625 | 2.152431 | 2.138407 | 2.309474 | 2.242433 | 2.233670 | 2.279359 | 2.213876 | 2.196949 | 2.285827 | 2.217115 | 2.208148 | 1.677444 | 1.169032 | NaN | 0.0 | NaN | -0.366513 | 1.651527 | 0.0 | 0.0 | 0.0 | 5.351858 | NaN | NaN | NaN | 1.672953 | 0.475885 | -inf | -0.049452 | -2.092876 | 0.958675 | -2.092876 | NaN | NaN | -0.262074 | NaN | -2.553283 | 1.152083 | -0.527503 | 0.254759 | 0.524774 | 1.429110 | -inf | 0.524774 | 0.658750 | -inf | NaN | 0.177199 | NaN | 1 | -1.043160 | 0.871489 | 0.944940 | 0.961500 | 1.000047 | -1.043160 | -inf | 0 | 0.294990 | 0.475885 | -1.323037 | 1 | 0.976629 | 101 | 1.160072 | 1 | 1.064297 | 0.556015 | -1.657353 | -2.802852 | 0.565480 | 0.378781 | 0.867461 | -0.660556 | NaN | 0.113385 | -2.280919 | 0.740911 | 0.229102 | -0.136648 | 0.369117 | NaN | 0.644140 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.314196 | 0.971854 | 0.693177 | 0.667466 | ... | -inf | 6 | 1 | 0.977634 | 0.659805 | 0.094048 | 2.359478 | 2.197225 | 2.489220 | 1.260266 | 1.846844 | 1.928181 | 1.893718 | -inf | 0.970422 | 0.874591 | 0.583198 | -0.366513 | NaN | 0 | -inf | -0.366513 | 0 | NaN | 0.665730 | 1 | 0.902416 | 1.019104 | 0.787195 | 2.497225 | 1.079918 | 2.641523 | 1.276345 | 1.859491 | 1.938702 | 1.905044 | -inf | 0.996229 | 0.874591 | 0.583198 | -0.366513 | -inf | 0 | -inf | -0.366513 | 0 | NaN | 0.834032 | 1 | 0.909564 | 0.957965 | 0.910235 | 2.520548 | 1.142787 | 2.650817 | 1.439718 | 1.865779 | 1.943641 | 1.910486 | 0.475885 | 1.113344 | 1.128508 | 1.019781 | 0.094048 | -inf | 0 | -0.366513 | 0.326634 | 0 | NaN | 1.113344 | 1 | 0.944625 | 0.951085 | 1.214110 | 2.610184 | 1.374030 | 2.722013 | 1.614203 | 1.881486 | 1.957005 | 1.924752 | 0.910235 | 1.298436 | 1.353565 | 1.305323 | 0.787195 | 0.326634 | NaN | 0.941939 | 1.128508 | 1 | NaN | 1.374030 | 4 | 15.581952 | 2011 | 8 | 2.995732 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0.693147 | 1.260266 | 1.079918 | 0.094048 | 0.910235 | 1.0 | 2.026877 | 2.0 | 0.583198 | 2.0 | 1 | 17 | 2.777338 | 2.455799 | NaN | NaN | 2.178877 | 2.085084 | 5 | 2.192392 | 2.167710 | 8 | 0 | 1.692624 | 1 | 0 | 6 | 1 | 1 | 1 | 3 | NaN | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 2.419676 | 2.345643 | 2.365318 | 2.268318 | 2.194752 | 2.193009 | 2.412248 | 2.349537 | 2.346678 | 2.331153 | 2.212015 | 2.293716 | 2.178877 | 2.098111 | 2.096254 | 2.192392 | 2.112760 | 2.110919 | 2.280474 | 2.208797 | 2.205116 | 2.264538 | 2.189649 | 2.189974 | 2.254158 | 2.179679 | 2.177552 | 1.704977 | 1.485877 | -inf | 0.0 | 1.436201 | 0.326634 | 1.504035 | 0.0 | 0.0 | 0.0 | 5.497168 | -inf | -inf | 1.601979 | 1.488584 | 0.996229 | -0.366513 | NaN | NaN | 0.708459 | NaN | NaN | NaN | NaN | NaN | NaN | -1.288377 | 0.430446 | 0.768902 | 0.204814 | 1.311626 | -0.366513 | 0.256556 | 0.433886 | -0.366513 | NaN | NaN | NaN | 1 | 0.811663 | 0.124596 | 0.622296 | 0.739976 | 0.771542 | 0.058597 | -0.366513 | 0 | 0.124596 | 0.326634 | NaN | 1 | 0.799227 | 32 | 0.554769 | 2 | 0.788942 | 0.223054 | 0.002207 | -1.513865 | 0.206621 | NaN | 0.659100 | 0.207842 | NaN | -0.366402 | 0.785818 | 0.594827 | -1.898586 | -0.068666 | 0.931238 | -0.940322 | -0.483586 | NaN | NaN | NaN | -2.661836 | NaN | NaN | NaN | 0.455672 | 0.653006 | NaN | -0.152953 | ... | 1.386294 | 2 | 0 | 1.121498 | 0.714183 | 0.094048 | 2.445959 | 1.945910 | 2.464062 | 1.041412 | 1.878341 | 1.959385 | 1.925192 | NaN | 0.583198 | 0.665730 | -inf | -0.366513 | -inf | 0 | -inf | 0.475885 | 0 | 0.326634 | 0.787195 | 0 | 1.133890 | 1.084348 | 0.326634 | 2.486305 | 0.732099 | 2.492834 | 1.113344 | 1.878588 | 1.957572 | 1.924091 | NaN | 0.665730 | 0.732099 | 0.094048 | -0.366513 | -inf | 0 | -inf | 0.475885 | 0 | 0.326634 | 0.874591 | 0 | 1.062727 | 1.196229 | 0.910235 | 2.611501 | 0.970422 | 2.573021 | 1.224128 | 1.863670 | 1.944545 | 1.910331 | -inf | 0.874591 | 0.874591 | 0.326634 | -0.366513 | -inf | 0 | -inf | 0.665730 | 0 | 0.583198 | 1.079918 | 1 | 0.845118 | 1.197883 | 1.432618 | 2.694980 | 1.305323 | 2.675939 | 1.644061 | 1.873792 | 1.952528 | 1.919109 | 0.787195 | 1.358877 | 1.428968 | 1.276345 | 0.996229 | 0.094048 | NaN | 0.996229 | 1.214110 | 1 | 0.834032 | 1.432618 | 14 | 15.607270 | 2011 | 8 | 3.135494 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 1.098612 | 1.324738 | 1.214110 | -0.366513 | 0.910235 | 1.0 | 2.026877 | 2.0 | 0.583198 | 2.0 | 1 | 16 | 2.733448 | 2.444940 | NaN | NaN | 2.160923 | 1.986263 | 4 | 2.167159 | 2.154890 | 7 | 0 | 1.956682 | 1 | 0 | 5 | 1 | 0 | 0 | 0 | -inf | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | 2.445174 | 2.373793 | 2.391642 | 2.248642 | 2.178134 | 2.166900 | 2.403066 | 2.338688 | 2.337964 | 2.316315 | 2.190210 | 2.280376 | 2.160923 | 2.084123 | 2.070860 | 2.167159 | 2.090906 | 2.077625 | 2.262600 | 2.193280 | 2.181843 | 2.289812 | 2.209369 | 2.224118 | 2.233053 | 2.161645 | 2.149694 | 1.757701 | 1.405493 | NaN | 0.0 | 1.672953 | 0.326634 | 1.409607 | 0.0 | 1.0 | 0.0 | 5.799093 | -inf | NaN | 1.705717 | 1.421453 | 1.097189 | 0.094048 | -0.285169 | -0.706286 | 1.048609 | -0.997573 | NaN | NaN | NaN | NaN | NaN | 0.464875 | 0.197371 | 0.910117 | -1.406380 | 1.004278 | 0.094048 | -1.406380 | -0.632122 | 0.094048 | NaN | 0.316610 | NaN | 1 | 0.544429 | 0.071524 | 0.663032 | 0.735344 | 0.790609 | NaN | 0.094048 | 0 | 0.071524 | 0.326634 | NaN | 1 | 0.813394 | 5 | 0.643770 | 3 | 0.561872 | 0.701008 | NaN | -0.752118 | 0.265114 | 0.239312 | 0.559394 | -1.143268 | NaN | -1.539213 | -0.382216 | NaN | -0.401599 | NaN | 0.712420 | NaN | NaN | -0.886323 | -0.925214 | -0.835263 | NaN | NaN | 0.145388 | NaN | 0.727386 | 0.373192 | 0.139274 | 0.470657 | ... | -inf | 5 | 3 | 0.830538 | 0.645309 | NaN | NaN | 0.000000 | 2.062202 | 0.970422 | 1.832166 | 1.919969 | 1.883216 | -0.366513 | 0.326634 | 0.583198 | -0.366513 | NaN | NaN | 0 | NaN | 0.326634 | 0 | NaN | 0.583198 | 5 | 0.941639 | 0.934339 | 0.326634 | 2.443644 | 0.665730 | 2.385963 | 1.156269 | 1.845998 | 1.929311 | 1.894162 | -0.366513 | 0.732099 | 0.787195 | 0.326634 | -inf | NaN | 0 | NaN | 0.326634 | 0 | NaN | 0.732099 | 5 | 0.914892 | 1.186511 | 0.732099 | 2.452382 | 0.665730 | 2.385963 | 1.311994 | 1.879071 | 1.957789 | 1.924402 | -0.366513 | 0.787195 | 1.041412 | 0.787195 | 0.094048 | -inf | 0 | NaN | 0.874591 | 0 | NaN | 1.097189 | 6 | 0.961901 | 1.122253 | 1.324738 | 2.653686 | 1.268453 | 2.658054 | 1.569453 | 1.880213 | 1.958276 | 1.925122 | 0.834032 | 1.214110 | 1.336753 | 1.169032 | 0.834032 | 0.094048 | NaN | 0.874591 | 1.192660 | 0 | 0.326634 | 1.436201 | 10 | 15.555977 | 2011 | 8 | 3.295837 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 1.386294 | 1.501549 | 1.364055 | 0.787195 | 0.910235 | 1.0 | 2.026877 | 2.0 | 0.583198 | 2.0 | 1 | 18 | 2.794099 | 2.492560 | NaN | NaN | 2.249118 | 2.178200 | 9 | 2.253706 | 2.276720 | 10 | 0 | 1.931189 | 1 | 0 | 17 | 6 | 0 | 0 | 11 | 0.326634 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 2.298540 | 2.218441 | 2.233765 | 2.328240 | 2.261163 | 2.255328 | 2.459454 | 2.398147 | 2.398601 | 2.331476 | 2.209829 | 2.295111 | 2.249118 | 2.174936 | 2.171405 | 2.253706 | 2.182844 | 2.173194 | 2.339443 | 2.273352 | 2.267146 | 2.091772 | 1.985037 | 2.017381 | 2.314908 | 2.246628 | 2.241268 | 1.812684 | 0.787195 | 1.369104 | 12.0 | 1.572832 | 1.364055 | 1.668330 | 0.0 | 9.0 | 2.0 | 6.129050 | 0.941939 | 1.156269 | 1.305323 | 1.582588 | 1.710085 | 0.326634 | -0.920957 | NaN | 0.895287 | NaN | NaN | NaN | NaN | NaN | NaN | 1.011161 | -0.354319 | 0.978887 | 0.376119 | 1.371568 | 0.326634 | 0.292083 | 0.508170 | 0.326634 | NaN | -0.783301 | -1.700447 | 1 | -0.015109 | 0.986364 | 1.050735 | 1.067265 | 1.088477 | -0.015109 | -inf | 0 | 0.022361 | 1.041412 | -0.367043 | 1 | 1.051159 | 83 | 0.645668 | 1 | 1.200047 | 0.812556 | 0.614241 | -0.562769 | 0.881276 | NaN | 1.202481 | NaN | NaN | -0.403479 | 0.625516 | 0.692908 | 0.466303 | 0.236512 | 0.776386 | -0.258983 | 0.870897 | NaN | NaN | NaN | NaN | NaN | -3.464701 | -0.808221 | 1.003526 | 1.041218 | 1.020716 | 0.479558 | ... | -inf | 3 | 1 | 1.207671 | 0.632608 | -0.366513 | 2.230622 | 1.945910 | 2.433780 | 1.142787 | 1.873815 | 1.950237 | 1.917629 | NaN | 0.475885 | 0.787195 | 0.732099 | -inf | NaN | 0 | -inf | NaN | 0 | NaN | 0.787195 | 2 | 1.245525 | 0.557165 | -0.366513 | 2.230622 | 0.665730 | 2.433780 | 1.169032 | 1.870608 | 1.948918 | 1.915631 | NaN | 0.475885 | 0.874591 | 0.732099 | -inf | NaN | 0 | -inf | -inf | 0 | NaN | 0.941939 | 2 | 1.110037 | 0.241035 | 0.326634 | 2.487050 | 0.910235 | 2.504255 | 1.242925 | 1.883657 | 1.960881 | 1.928038 | NaN | 0.475885 | 0.970422 | 0.834032 | 0.094048 | NaN | 0 | -inf | -0.366513 | 0 | NaN | 1.061385 | 3 | 0.975251 | 0.306314 | 0.732099 | 2.518148 | 1.128508 | 2.621472 | 1.413636 | 1.922332 | 1.994388 | 1.963549 | 0.326634 | 0.665730 | 1.113344 | 0.996229 | 0.874591 | -0.366513 | -inf | 0.326634 | 0.326634 | 0 | NaN | 1.181143 | 3 | 16.388123 | 2011 | 9 | 0.000000 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 1.609438 | 1.468751 | 1.468751 | 0.326634 | 0.910235 | 1.0 | 2.026877 | 2.0 | 0.583198 | 2.0 | 1 | 16 | 2.769055 | 2.450269 | NaN | NaN | 2.157475 | 2.089804 | 7 | 2.176682 | 2.192550 | 9 | 0 | 1.845537 | 4 | 2 | 25 | 2 | 0 | 0 | 10 | 1.511296 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 2.326920 | 2.252562 | 2.261000 | 2.251357 | 2.183365 | 2.167195 | 2.409455 | 2.345744 | 2.344535 | 2.321864 | 2.203340 | 2.283302 | 2.157475 | 2.079485 | 2.068110 | 2.176682 | 2.104903 | 2.083680 | 2.265037 | 2.197360 | 2.182813 | 2.145869 | 2.061368 | 2.061269 | 2.236598 | 2.167453 | 2.151254 | 1.889298 | 1.353565 | NaN | 0.0 | 1.866580 | 1.019781 | 1.268453 | 0.0 | 3.0 | 1.0 | 6.614726 | 1.777695 | 1.555235 | 1.606154 | 1.417583 | 1.378840 | 0.475885 | -1.474574 | NaN | 0.747724 | NaN | NaN | NaN | NaN | NaN | NaN | 0.862343 | 0.074408 | 0.906994 | NaN | 0.844595 | 0.475885 | -0.760491 | -0.263463 | 1.553373 | NaN | NaN | NaN | 1 | 0.897083 | -0.609928 | NaN | NaN | -0.054167 | -0.609928 | 0.326634 | 0 | 0.132922 | 0.834032 | NaN | 0 | -0.760491 | 113 | -1.041095 | 4 | 0.628203 | 0.770102 | -0.705888 | 0.254611 | 0.420962 | -0.041731 | 0.794939 | -0.388755 | NaN | -0.068267 | 0.322175 | 0.249915 | NaN | -0.473102 | 0.284928 | -2.164146 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | -0.051839 | -2.238699 | -0.531273 | -1.229598 | ... | 1.791759 | 7 | 0 | 0.347732 | 0.454158 | 1.511296 | 2.638648 | 2.197225 | 2.565553 | 1.723802 | 1.893447 | 1.966961 | 1.935473 | 1.079918 | 1.446565 | 1.459560 | 1.453174 | 1.224128 | 0.583198 | 1 | 1.061385 | 1.224128 | 0 | 0.834032 | 0.970422 | 2 | 0.412588 | 0.478362 | 1.610227 | 2.660336 | 1.041412 | 2.583568 | 1.821321 | 1.893276 | 1.966576 | 1.935164 | 1.203634 | 1.582588 | 1.581000 | 1.584161 | 1.364055 | 0.970422 | 1 | 1.268453 | 1.413636 | 0 | 1.041412 | 1.113344 | 3 | 0.483269 | 0.767366 | 1.744023 | 2.711073 | 1.409607 | 2.684239 | 1.942123 | 1.909364 | 1.981231 | 1.950402 | 1.421453 | 1.719815 | 1.720487 | 1.717098 | 1.610227 | 1.397 | 4 | 1.446565 | 1.567739 | 1 | 1.305323 | 1.468751 | 5 | 0.754171 | 0.871543 | 1.877209 | 2.769101 | 1.555235 | 2.712649 | 2.045529 | 1.909511 | 1.981329 | 1.950517 | 1.601979 | 1.846657 | 1.849961 | 1.842698 | 1.751838 | 1.543753 | 1.041412 | 1.590311 | 1.698150 | 2 | 1.506488 | 1.662599 | 14 | 16.608603 | 2011 | 9 | 1.609438 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 345 columns
scaling_feature=[feature for feature in train_data.columns if feature not in ['id','Price_doc'] ]
len(scaling_feature)
344
train_data.fillna(0,inplace=True)
[min(train_data[i]) for i in train_data.columns]
[0.0, -inf, -inf, -inf, -inf, 1.0, -inf, 0.0, -inf, 1.0, 0, 0, 2.6774989428536276, 2.0595294527304104, 0.0, 0.0, 1.641863663944111, 0.0, 0, 1.633928354228994, 1.9343700821476857, 0, 0, 0.0, 0, 0, 0, 0, 0, 0, 0, -inf, 0, 0, 0, 0, 0, 0, 0, 0, 2.0595294527304104, 1.9596328997183297, 1.974243635478386, 1.7749349530624492, 1.6566548425881231, 1.6440614850208792, 2.0012332255705867, 1.9110842330081819, 1.894269181238364, 1.8415452149350158, 1.61935972946482, 1.7873848498299765, 1.641863663944111, 1.50648771329857, 1.4912513280732116, 1.633928354228994, 1.4964725796851872, 1.4831304777411851, 1.7948535467124367, 1.6800782821876006, 1.6654905987582682, 1.8491427716078928, 1.7457329447065377, 1.710800555208106, 1.7534605989414096, 1.631588328253238, 1.61935972946482, -inf, -inf, -inf, 0.0, -inf, -inf, 0.0, 0.0, 0.0, 0.0, 0.0, -inf, -inf, -inf, -inf, -inf, -inf, -7.960598786346565, -9.839206968048753, -5.756901923145519, -9.839206968048753, -6.409107837255395, -7.363242555871257, -9.895921628063967, -5.71074990895329, -10.516654798246318, -5.596157089002031, -9.152124461512093, -5.301011695861906, -7.979591133121632, -2.364158535275807, -inf, -6.568681126087842, -8.677846244204565, -inf, -5.552340986880593, -7.999832843634663, -9.853923978622282, 0, -8.075394215172222, -6.386686949460646, -5.438501585932792, -4.987595439069698, -3.953089322356667, -8.075384575035306, -inf, 0, -8.199709904254176, -inf, -10.516654798246318, 0, -4.567625782444035, 5, -7.835815371846494, 1, -4.691567903246553, -6.791690943582811, -8.512639496896666, -7.87623202024998, -5.4671174105889655, -7.891259963750091, -6.767786208227923, -8.924018212329587, -8.237162889211437, -8.617352647072467, -13.60120657469366, -6.306148670262413, -7.819587781838355, -10.42407538206976, -6.162857867528065, -10.311111584483152, -6.34014968366858, -8.100425642082211, -8.375370596575328, -9.921350268163124, -10.487614911910729, -7.363242555871257, -8.55392245119615, -8.207960479608374, -7.777596203159332, -7.082120440593117, -7.298102385579864, -7.115961934607281, -7.596311363141728, 0, -inf, -inf, -inf, 0.0, 0, 0.0, -inf, 1.7411295468056076, 1.8269026656007323, 1.790335880924894, 0, -inf, -inf, -inf, 0, 0, 0, 0, 0, 0, 0, 0, 0, -inf, -inf, -inf, 0.0, 0, 0.0, -inf, 1.7411295468056076, 1.8269026656007323, 1.790335880924894, -inf, -inf, -inf, -inf, -inf, -inf, 0, 0, -inf, 0, -inf, 0, 0, -4.610149476789775, -inf, -inf, 0.0, -inf, 0.0, -inf, 1.7411295468056076, 1.8269026656007323, 1.790335880924894, -inf, -inf, -inf, -inf, -inf, -inf, 0, -inf, -inf, 0, -inf, -inf, 0, -2.5644559444400956, -inf, -inf, 0.0, -inf, 0.0, -inf, 1.7411295468056076, 1.8269026656007323, 1.790335880924894, -inf, -inf, -inf, -inf, -inf, -inf, 0, -inf, -inf, 0, -inf, -inf, 0, -1.9678147153196068, -inf, -inf, 0.0, -inf, 0.0, -inf, 1.7411295468056076, 1.8269026656007323, 1.790335880924894, -inf, -inf, -inf, -inf, -inf, -inf, 0, -inf, -inf, 0, -inf, -inf, 0, 0.22988953758400052, -inf, -inf, 0.0, -inf, 0.0, -inf, 1.7411295468056076, 1.8269026656007323, 1.790335880924894, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, 0, -inf, -inf, 0, 11.512925464970229, 2011, 1, 0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
for i in train_data.columns:
train_data.loc[(train_data[i]==min(train_data[i])),i]=np.quantile(train_data[i],1)
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
train_data.loc[:,scaling_feature] = scaler.fit_transform(train_data.loc[:,scaling_feature])
scaling_feature_test=[feature for feature in test_data.columns if feature not in ['id'] ]
len(scaling_feature_test)
339
for i in test_data.columns:
test_data.loc[(test_data[i]==min(test_data[i])),i]=np.quantile(test_data[i],1)
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
test_data.loc[:,scaling_feature_test] = scaler.fit_transform(test_data.loc[:,scaling_feature_test])
test_data.head()
| id | full_sq | life_sq | floor | max_floor | material | build_year | num_room | kitch_sq | state | product_type | sub_area | area_m | raion_popul | green_zone_part | indust_part | children_preschool | preschool_quota | preschool_education_centers_raion | children_school | school_quota | school_education_centers_raion | school_education_centers_top_20_raion | hospital_beds_raion | healthcare_centers_raion | university_top_20_raion | sport_objects_raion | additional_education_raion | culture_objects_top_25 | culture_objects_top_25_raion | shopping_centers_raion | office_raion | thermal_power_plant_raion | incineration_raion | oil_chemistry_raion | radiation_raion | railroad_terminal_raion | big_market_raion | nuclear_reactor_raion | detention_facility_raion | full_all | male_f | female_f | young_all | young_male | young_female | work_all | work_male | work_female | ekder_all | ekder_male | ekder_female | 0_6_all | 0_6_male | 0_6_female | 7_14_all | 7_14_male | 7_14_female | 0_17_all | 0_17_male | 0_17_female | 16_29_all | 16_29_male | 16_29_female | 0_13_all | 0_13_male | 0_13_female | raion_build_count_with_material_info | build_count_block | build_count_wood | build_count_frame | build_count_brick | build_count_monolith | build_count_panel | build_count_foam | build_count_slag | build_count_mix | raion_build_count_with_builddate_info | build_count_before_1920 | build_count_1921-1945 | build_count_1946-1970 | build_count_1971-1995 | build_count_after_1995 | ID_metro | metro_min_avto | metro_km_avto | metro_min_walk | metro_km_walk | kindergarten_km | school_km | park_km | green_zone_km | industrial_km | water_treatment_km | cemetery_km | incineration_km | railroad_station_walk_km | railroad_station_walk_min | ID_railroad_station_walk | railroad_station_avto_km | railroad_station_avto_min | ID_railroad_station_avto | public_transport_station_km | public_transport_station_min_walk | water_km | water_1line | mkad_km | ttk_km | sadovoe_km | bulvar_ring_km | kremlin_km | big_road1_km | ID_big_road1 | big_road1_1line | big_road2_km | ID_big_road2 | railroad_km | railroad_1line | zd_vokzaly_avto_km | ID_railroad_terminal | bus_terminal_avto_km | ID_bus_terminal | oil_chemistry_km | nuclear_reactor_km | radiation_km | power_transmission_line_km | thermal_power_plant_km | ts_km | big_market_km | market_shop_km | fitness_km | swim_pool_km | ice_rink_km | stadium_km | basketball_km | hospice_morgue_km | detention_facility_km | public_healthcare_km | university_km | workplaces_km | shopping_centers_km | office_km | additional_education_km | preschool_km | big_church_km | church_synagogue_km | mosque_km | theater_km | museum_km | exhibition_km | ... | cafe_count_1000_price_4000 | cafe_count_1000_price_high | big_church_count_1000 | church_count_1000 | mosque_count_1000 | leisure_count_1000 | sport_count_1000 | market_count_1000 | green_part_1500 | prom_part_1500 | office_count_1500 | office_sqm_1500 | trc_count_1500 | trc_sqm_1500 | cafe_count_1500 | cafe_sum_1500_min_price_avg | cafe_sum_1500_max_price_avg | cafe_avg_price_1500 | cafe_count_1500_na_price | cafe_count_1500_price_500 | cafe_count_1500_price_1000 | cafe_count_1500_price_1500 | cafe_count_1500_price_2500 | cafe_count_1500_price_4000 | cafe_count_1500_price_high | big_church_count_1500 | church_count_1500 | mosque_count_1500 | leisure_count_1500 | sport_count_1500 | market_count_1500 | green_part_2000 | prom_part_2000 | office_count_2000 | office_sqm_2000 | trc_count_2000 | trc_sqm_2000 | cafe_count_2000 | cafe_sum_2000_min_price_avg | cafe_sum_2000_max_price_avg | cafe_avg_price_2000 | cafe_count_2000_na_price | cafe_count_2000_price_500 | cafe_count_2000_price_1000 | cafe_count_2000_price_1500 | cafe_count_2000_price_2500 | cafe_count_2000_price_4000 | cafe_count_2000_price_high | big_church_count_2000 | church_count_2000 | mosque_count_2000 | leisure_count_2000 | sport_count_2000 | market_count_2000 | green_part_3000 | prom_part_3000 | office_count_3000 | office_sqm_3000 | trc_count_3000 | trc_sqm_3000 | cafe_count_3000 | cafe_sum_3000_min_price_avg | cafe_sum_3000_max_price_avg | cafe_avg_price_3000 | cafe_count_3000_na_price | cafe_count_3000_price_500 | cafe_count_3000_price_1000 | cafe_count_3000_price_1500 | cafe_count_3000_price_2500 | cafe_count_3000_price_4000 | cafe_count_3000_price_high | big_church_count_3000 | church_count_3000 | mosque_count_3000 | leisure_count_3000 | sport_count_3000 | market_count_3000 | green_part_5000 | prom_part_5000 | office_count_5000 | office_sqm_5000 | trc_count_5000 | trc_sqm_5000 | cafe_count_5000 | cafe_sum_5000_min_price_avg | cafe_sum_5000_max_price_avg | cafe_avg_price_5000 | cafe_count_5000_na_price | cafe_count_5000_price_500 | cafe_count_5000_price_1000 | cafe_count_5000_price_1500 | cafe_count_5000_price_2500 | cafe_count_5000_price_4000 | cafe_count_5000_price_high | big_church_count_5000 | church_count_5000 | mosque_count_5000 | leisure_count_5000 | sport_count_5000 | market_count_5000 | year | month | day | life_sqnan | build_yearnan | statenan | preschool_quotanan | school_quotanan | hospital_beds_raionnan | raion_build_count_with_material_infonan | build_count_blocknan | build_count_woodnan | build_count_framenan | build_count_bricknan | build_count_monolithnan | build_count_panelnan | build_count_foamnan | build_count_slagnan | build_count_mixnan | raion_build_count_with_builddate_infonan | build_count_before_1920nan | build_count_1921-1945nan | build_count_1946-1970nan | build_count_1971-1995nan | build_count_after_1995nan | metro_min_walknan | metro_km_walknan | railroad_station_walk_kmnan | railroad_station_walk_minnan | ID_railroad_station_walknan | cafe_sum_500_min_price_avgnan | cafe_sum_500_max_price_avgnan | cafe_avg_price_500nan | cafe_sum_1000_min_price_avgnan | cafe_sum_1000_max_price_avgnan | cafe_avg_price_1000nan | cafe_sum_1500_min_price_avgnan | cafe_sum_1500_max_price_avgnan | cafe_avg_price_1500nan | green_part_2000nan | cafe_sum_2000_min_price_avgnan | cafe_sum_2000_max_price_avgnan | cafe_avg_price_2000nan | cafe_sum_3000_min_price_avgnan | cafe_sum_3000_max_price_avgnan | cafe_avg_price_3000nan | prom_part_5000nan | cafe_sum_5000_min_price_avgnan | cafe_sum_5000_max_price_avgnan | cafe_avg_price_5000nan | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 38135 | -0.720188 | -0.425838 | -0.877825 | -0.581216 | 0.531818 | 0.187170 | 1.295149 | -0.222979 | 0.146649 | 0.0 | 0.0 | 0.362943 | 1.579349 | -0.496943 | -0.722049 | 2.249692 | 4.116125 | 1.104928 | 2.321928 | 4.294872 | 1.307487 | -3.093263 | -0.148247 | -1.300880 | 0.326089 | 0.154780 | -0.375633 | 0.0 | 0.265746 | -0.539118 | -0.682446 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -0.124467 | -0.121389 | -0.127027 | 2.289409 | 2.386136 | 2.181858 | 1.758536 | 1.635214 | 1.873301 | 0.456361 | 0.326359 | 0.512294 | 2.249692 | 2.358689 | 2.129094 | 2.321928 | 2.413747 | 2.216802 | 2.281233 | 2.374503 | 2.177963 | -0.161461 | -0.166827 | -0.153458 | 2.289390 | 2.396009 | 2.170917 | 3.698658 | 2.062212 | 0.705857 | -1.693016 | 0.391978 | 2.414051 | 2.940625 | -5.415450 | 0.782686 | -1.051917 | 3.698107 | -1.181503 | 1.074286 | 1.343795 | 0.275823 | 3.635613 | -0.466868 | -0.553255 | -0.479885 | -0.435708 | -0.435708 | -0.367592 | -0.192589 | -0.233743 | -0.758421 | -0.065234 | -1.636107 | -0.864453 | -0.146032 | 0.171800 | 0.171800 | -0.018409 | 0.155498 | 0.153153 | -0.191873 | -0.215017 | -0.215017 | -0.937407 | 0.0 | -0.032554 | 1.051621 | 1.082121 | 1.102163 | 0.678459 | 1.866258 | -1.030354 | 0.0 | 1.273101 | 0.566921 | -0.611995 | 0.0 | 1.068773 | -1.078169 | -0.256843 | 0.253219 | 0.425039 | 0.738345 | 0.345405 | 0.131329 | 0.287966 | 0.808903 | 0.242382 | 0.413551 | -0.467709 | -0.054884 | 1.073371 | 1.493166 | 0.745606 | 0.104272 | 0.962672 | -0.378084 | 1.490326 | 0.654183 | 0.257094 | 0.930275 | -0.363237 | -0.197951 | -0.473754 | -0.207520 | -1.046394 | 1.402244 | 1.142573 | 1.296132 | ... | 0.373905 | 0.224449 | -1.516908 | -0.951125 | 0.0 | 0.457361 | -0.698101 | 0.614724 | -0.007653 | -0.923969 | 1.296533 | 1.314706 | 1.740068 | 1.765686 | -0.469029 | 0.909483 | 0.538555 | 0.684330 | 1.060684 | 1.504421 | 2.105188 | -0.710710 | 1.082391 | 0.537703 | 0.272655 | -1.001678 | -0.491847 | 0.0 | 0.633197 | -0.762432 | 0.887953 | -0.430031 | -0.983962 | 1.533960 | 1.540772 | 2.092902 | 2.303388 | -0.397717 | 0.753228 | 0.420868 | 0.550597 | 1.332940 | 1.994457 | 2.341679 | -0.532825 | 1.337040 | 0.749555 | 0.316944 | -0.807555 | -0.390294 | 0.0 | 0.773571 | -0.735301 | 1.110466 | -0.637304 | -1.043258 | 1.871263 | 1.824614 | -0.875123 | -0.841788 | -0.379226 | 0.179678 | -0.085962 | 0.017290 | -0.738505 | -0.546056 | -0.502142 | -0.415542 | 1.746133 | 1.096697 | 0.414079 | -0.661712 | -0.391445 | -2.416831 | 1.030214 | -0.831028 | 1.485452 | -0.145386 | -1.056496 | -0.562970 | -0.677949 | -1.002645 | -0.983062 | -0.518350 | -0.650138 | -0.893287 | -0.799249 | -0.650806 | -0.545564 | -0.591706 | -0.488643 | 3.044759 | 2.041240 | 0.548158 | -0.566298 | -0.475511 | -1.587810 | 1.440184 | -0.936058 | -1.271634 | 0.0 | -0.071315 | 1.644608 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 30475 | 1.242129 | -0.083525 | -0.213539 | 0.339382 | 0.531818 | 0.255150 | -0.694237 | -0.241597 | 1.313446 | 0.0 | 0.0 | 0.335827 | -1.483901 | 1.515007 | -0.945628 | -1.338624 | -0.130403 | 1.623708 | -1.348669 | -0.250098 | 1.552875 | 0.323283 | -0.148247 | 1.124855 | 0.326089 | 1.832181 | 1.685920 | 0.0 | 0.265746 | -0.931349 | 1.550957 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -0.437313 | -0.438222 | -0.436383 | -1.349119 | -1.335979 | -1.360330 | -1.460973 | -1.468277 | -1.445139 | -1.498586 | -1.460280 | -1.507933 | -1.338624 | -1.322969 | -1.352238 | -1.348669 | -1.338617 | -1.354803 | -1.356053 | -1.344436 | -1.365700 | -0.438010 | -0.438158 | -0.438357 | -1.349680 | -1.336207 | -1.360952 | -0.319273 | -0.340464 | -0.924861 | 0.530194 | -0.454359 | -0.449821 | -0.351987 | 0.189085 | 0.782686 | 0.467651 | -0.318451 | 0.793199 | -0.767822 | -0.274819 | -0.338500 | -0.365554 | -0.894191 | -0.072462 | -0.029076 | -0.012167 | -0.012167 | 0.030438 | -0.057449 | 0.367951 | 4.013384 | -0.325963 | 0.500734 | 0.019043 | 0.958208 | 0.338201 | 0.338201 | -0.500349 | 0.315782 | 0.293118 | -0.599768 | 0.215116 | 0.215116 | 0.398599 | 0.0 | 0.052287 | 0.732605 | 0.722609 | 0.734415 | 0.413785 | 0.266975 | -0.339099 | 0.0 | 0.592906 | -0.054341 | 0.285373 | 0.0 | 1.056135 | -0.559623 | 0.833124 | -0.015204 | 1.062965 | 0.848426 | 0.665156 | 0.293251 | 0.466983 | -0.202339 | 0.195739 | 1.188617 | 0.018733 | 0.054386 | -0.326146 | 0.579614 | 0.609975 | 1.172162 | 1.081739 | 0.100419 | 0.813163 | 0.954658 | 0.051790 | 0.548483 | -0.044727 | -0.059619 | -0.191324 | 0.100051 | 0.814213 | 0.460533 | 0.431872 | -0.144518 | ... | 0.373905 | 0.224449 | 0.700431 | 1.111609 | 0.0 | 0.457361 | 1.577157 | 0.614724 | 2.436839 | -0.682165 | 1.296533 | 1.314706 | 1.740068 | 1.765686 | -0.464624 | 0.295729 | 0.538555 | 0.444234 | 1.060684 | 1.504421 | -0.561746 | 1.530591 | -0.987468 | 0.537703 | 0.272655 | 1.054297 | -0.533294 | 0.0 | 0.633197 | -1.050444 | 0.887953 | 2.010009 | -0.776539 | 1.533960 | 1.540772 | -0.928094 | -0.704428 | -0.384394 | -0.069406 | -0.041665 | -0.052826 | 1.332940 | -0.588202 | -0.517802 | -0.542439 | -0.807938 | 0.749555 | 0.316944 | -0.807555 | -0.390294 | 0.0 | 0.773571 | -0.988001 | 1.110466 | 1.182109 | -0.992953 | 1.871263 | 1.824614 | -0.933155 | -0.909670 | -0.384456 | -0.345776 | -0.175224 | -0.241776 | 1.587716 | -0.552448 | -0.471351 | -0.454169 | -0.649666 | 1.096697 | 0.414079 | -0.661712 | -0.328256 | 0.413765 | 1.030214 | -0.831028 | 1.485452 | 1.453722 | -0.560321 | -0.558772 | -0.638758 | -1.071682 | -1.040531 | -0.516717 | -0.247162 | -0.161538 | -0.195068 | -0.650806 | -0.551490 | -0.567129 | -0.515133 | -0.504662 | -0.592727 | 0.548158 | -0.543422 | -0.457269 | 0.629798 | -0.822869 | -0.971107 | -1.271634 | 0.0 | -0.071315 | 1.644608 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 30476 | -0.646967 | -0.270562 | -0.767110 | -1.041515 | -2.399684 | 0.064159 | -0.818574 | -0.232642 | -1.020148 | 0.0 | 0.0 | -0.347291 | 0.894814 | -0.903506 | 0.492785 | 0.253439 | -0.528036 | 0.067370 | 0.230682 | 0.003536 | -0.164841 | 0.323283 | 0.544834 | -0.330586 | 0.326089 | 0.154780 | 1.685920 | 0.0 | 0.265746 | -0.800605 | -0.633540 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -0.369586 | -0.374809 | -0.365058 | 0.242451 | 0.213153 | 0.273095 | 0.957094 | 0.861570 | 1.048313 | 1.048518 | 0.885588 | 1.116125 | 0.253439 | 0.220367 | 0.288118 | 0.230682 | 0.210944 | 0.250743 | 0.248909 | 0.221422 | 0.277602 | -0.363491 | -0.375945 | -0.351286 | 0.243925 | 0.216166 | 0.272868 | 0.451845 | 0.907438 | 1.206374 | 0.530194 | 0.439288 | -0.554917 | 0.590147 | 0.189085 | -1.335251 | 0.467651 | 0.452960 | 0.793199 | -0.749215 | 1.332822 | -0.236113 | -0.318665 | -0.484673 | -0.500450 | -0.415593 | -0.375304 | -0.375304 | -0.372301 | -0.320054 | -0.114906 | 0.495733 | -0.236976 | -0.108440 | 0.710064 | -0.197488 | -0.133154 | -0.133154 | 0.913342 | -0.074933 | -0.062160 | 0.351986 | -0.213475 | -0.213475 | 1.815171 | 0.0 | -0.548335 | -0.620620 | -0.623805 | -0.660199 | -0.573558 | -0.984264 | -0.527623 | 0.0 | 0.114717 | 1.696487 | 0.226541 | 0.0 | -0.607469 | 1.485754 | -0.321478 | -1.357320 | -1.526046 | 0.507795 | -0.616638 | -0.214814 | -0.571856 | -0.273547 | 0.340189 | -0.888731 | -0.136464 | -0.503585 | -0.574268 | -0.995849 | -0.548705 | -0.686932 | -0.697363 | -0.577568 | -0.261485 | -0.104027 | -0.045401 | -0.288713 | 0.126843 | -0.328428 | -0.274565 | 0.114893 | 0.399420 | -0.897984 | -0.809948 | -0.787819 | ... | 0.373905 | 0.224449 | 0.700431 | 1.111609 | 0.0 | -2.349895 | -0.271490 | -1.163312 | 0.668735 | -0.501730 | -0.851352 | -0.880011 | -0.885156 | 1.765686 | -0.394142 | -0.358901 | -0.379051 | -0.372144 | -1.021822 | -0.675329 | -0.497867 | -0.682517 | -0.967941 | 0.537703 | 0.272655 | -1.001678 | -0.533294 | 0.0 | -1.672948 | 0.005601 | -0.586660 | 1.411631 | -0.311998 | -0.697959 | -0.780531 | -0.745004 | -0.693237 | -0.314451 | -0.488785 | -0.458505 | -0.471186 | -0.828571 | -0.533252 | -0.423533 | -0.503980 | -0.795467 | 0.749555 | 0.316944 | -0.775026 | -0.390294 | 0.0 | -1.243136 | -0.166727 | -0.183291 | 1.648513 | -0.349047 | -0.602747 | -0.672130 | -0.701027 | -0.886902 | -0.313844 | -0.715637 | -0.602578 | -0.647572 | -0.738505 | -0.507702 | -0.371281 | -0.415542 | -0.640344 | 1.096697 | 0.414079 | -0.636512 | -0.391445 | 0.413765 | -0.942915 | -0.294473 | -0.298954 | 0.223358 | 0.410600 | -0.453828 | -0.568485 | -0.381312 | -0.368899 | -0.257125 | -0.703292 | -0.694517 | -0.698552 | -0.600535 | -0.261104 | -0.223047 | -0.263478 | -0.409503 | -0.537852 | 0.548158 | -0.360410 | -0.274848 | 0.629798 | -0.627028 | 0.062841 | 0.314596 | 0.0 | -0.071315 | 1.644608 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 30477 | 0.441582 | 0.114099 | 0.782889 | 0.339382 | 0.531818 | 0.245439 | -0.818574 | -0.095953 | 0.146649 | 0.0 | 0.0 | 0.158707 | -1.429040 | 0.202462 | -0.876407 | -1.282999 | -0.130403 | 1.623708 | -1.297228 | -0.250098 | 1.552875 | 0.323283 | -0.148247 | 1.124855 | 0.326089 | 1.832181 | -0.719225 | 0.0 | 0.265746 | 1.945014 | 1.550957 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -0.467691 | -0.469002 | -0.466432 | -1.295325 | -1.281853 | -1.306751 | -1.406720 | -1.411452 | -1.393801 | -1.446508 | -1.412660 | -1.454015 | -1.282999 | -1.267095 | -1.296988 | -1.297228 | -1.286981 | -1.303228 | -1.301647 | -1.289777 | -1.311887 | -0.466179 | -0.467595 | -0.465676 | -1.295813 | -1.282128 | -1.307431 | -0.319273 | -0.340464 | -0.924861 | 0.530194 | -0.454359 | -0.449821 | -0.351987 | 0.189085 | 0.782686 | 0.467651 | -0.318451 | 0.793199 | -0.767822 | -0.274819 | -0.338500 | -0.365554 | -0.466868 | 0.526366 | 0.402678 | 0.342570 | 0.342570 | 0.744120 | 0.451979 | 0.678120 | -0.845483 | -0.481173 | -1.056021 | 0.294800 | 0.419513 | 1.581426 | 1.581426 | -0.018409 | 1.675431 | 1.885978 | -0.191873 | 1.584831 | 1.584831 | -0.974055 | 0.0 | 0.425569 | 1.342953 | 1.345771 | 1.347650 | 0.867355 | 0.725303 | 1.231935 | 0.0 | 2.792327 | -1.466299 | 1.319367 | 0.0 | 2.017605 | -0.559623 | 0.456431 | -0.015204 | 0.825626 | 1.216489 | 0.812743 | 0.549925 | 1.013913 | 1.133013 | -0.324912 | 2.006252 | 1.435504 | 0.884850 | 1.716821 | 1.252237 | 1.274876 | 1.097910 | 1.396909 | 0.533293 | 1.230477 | 1.235153 | 1.576179 | 1.977498 | -0.393120 | 0.461845 | 0.952853 | -0.828824 | -0.282956 | 1.153889 | 1.609682 | 1.650187 | ... | 0.373905 | 0.224449 | 0.700431 | -0.890456 | 0.0 | 0.457361 | 1.577157 | 0.614724 | 0.534457 | -0.772382 | 1.296533 | 1.314706 | 1.740068 | 1.765686 | -0.473434 | 5.205693 | 5.257657 | 5.246327 | 1.060684 | -0.740591 | 2.105188 | 1.530591 | 1.082391 | 0.537703 | 0.272655 | 1.054297 | -0.450399 | 0.0 | 0.633197 | 1.829679 | 0.887953 | 0.728076 | -0.853602 | 1.533960 | 1.540772 | 2.092902 | 2.303388 | -0.404378 | 4.705062 | 4.737761 | 4.730574 | 1.332940 | -0.588202 | 2.341679 | 2.130475 | 1.337040 | 0.749555 | 0.316944 | 1.339376 | -0.354867 | 0.0 | 0.773571 | 2.297097 | 1.110466 | 0.842573 | -1.214296 | 1.871263 | 1.824614 | 2.606797 | 2.593685 | -0.407993 | 5.606113 | 5.716608 | 5.684703 | 1.587716 | -0.552448 | 2.884863 | 2.975891 | 1.746133 | 1.096697 | 0.414079 | 1.782651 | -0.359850 | 0.413765 | 1.030214 | 2.495614 | 1.485452 | 0.098314 | -1.555968 | 2.690303 | 2.739924 | 2.759871 | 2.646635 | -0.541207 | 5.611585 | 5.493050 | 5.543491 | 2.214667 | -0.569269 | 3.328373 | -0.535001 | -0.504662 | -0.574435 | 0.548158 | 2.842292 | -0.475511 | 0.629798 | 1.440184 | -1.146353 | 1.900826 | 0.0 | -0.071315 | 1.644608 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 30478 | -0.671374 | 0.255259 | 0.782889 | 0.339382 | 0.531818 | 0.255150 | 1.295149 | -0.241597 | 1.313446 | 0.0 | 0.0 | 0.335827 | -1.483901 | 1.515007 | -0.945628 | -1.338624 | -0.130403 | 1.623708 | -1.348669 | -0.250098 | 1.552875 | 0.323283 | -0.148247 | 1.124855 | 0.326089 | 1.832181 | 1.685920 | 0.0 | 0.265746 | -0.931349 | 1.550957 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -0.437313 | -0.438222 | -0.436383 | -1.349119 | -1.335979 | -1.360330 | -1.460973 | -1.468277 | -1.445139 | -1.498586 | -1.460280 | -1.507933 | -1.338624 | -1.322969 | -1.352238 | -1.348669 | -1.338617 | -1.354803 | -1.356053 | -1.344436 | -1.365700 | -0.438010 | -0.438158 | -0.438357 | -1.349680 | -1.336207 | -1.360952 | -0.319273 | -0.340464 | -0.924861 | 0.530194 | -0.454359 | -0.449821 | -0.351987 | 0.189085 | 0.782686 | 0.467651 | -0.318451 | 0.793199 | -0.767822 | -0.274819 | -0.338500 | -0.365554 | -0.894191 | -0.408629 | -0.315732 | -0.281484 | -0.281484 | -0.074746 | -0.080142 | 0.410462 | 0.125178 | -0.544857 | 0.604902 | 0.022200 | 1.048473 | -0.105494 | -0.105494 | -0.500349 | -0.138801 | -0.203974 | -0.599768 | 0.096659 | 0.096659 | -0.583196 | 0.0 | 0.101687 | 0.753227 | 0.743037 | 0.754332 | 0.429647 | 0.793468 | -0.339099 | 0.0 | 0.213926 | -0.054341 | -0.012711 | 0.0 | 0.890779 | -0.559623 | 0.891355 | -0.015204 | 1.113917 | 0.889262 | 0.746749 | 0.407849 | 0.477322 | -0.187772 | 0.249785 | 0.712270 | 0.111053 | -0.275717 | -0.653102 | 0.639246 | 0.613893 | 1.258119 | 0.929203 | -0.280762 | 0.877478 | 0.921405 | 0.065145 | 0.737480 | -0.140728 | -0.082848 | -0.338075 | 0.123798 | 0.888668 | 0.522004 | 0.469048 | -0.102337 | ... | 0.373905 | 0.224449 | 0.700431 | 1.111609 | 0.0 | 0.457361 | 1.577157 | 0.614724 | 1.473157 | -0.879385 | 1.296533 | 1.314706 | 1.740068 | 1.765686 | -0.464624 | 0.295729 | 0.538555 | 0.444234 | 1.060684 | 1.504421 | -0.561746 | 1.530591 | -0.987468 | 0.537703 | 0.272655 | -1.001678 | -0.533294 | 0.0 | 0.633197 | 1.829679 | 0.887953 | 1.247721 | -0.844239 | 1.533960 | 1.540772 | -0.836549 | -0.683500 | -0.384394 | -0.069406 | -0.041665 | -0.052826 | 1.332940 | -0.588202 | -0.517802 | -0.542439 | -0.807938 | 0.749555 | 0.316944 | -0.807555 | -0.390294 | 0.0 | 0.773571 | -0.861651 | 1.110466 | 1.339044 | -1.128218 | 1.871263 | 1.824614 | -0.933155 | -0.909670 | -0.387071 | -0.242564 | -0.102492 | -0.157053 | 1.587716 | -0.552448 | -0.479049 | -0.454169 | -0.649666 | 1.096697 | 0.414079 | -0.661712 | -0.359850 | 0.413765 | 1.030214 | -0.866798 | 1.485452 | 1.136091 | -0.682304 | -0.562970 | -0.655584 | -1.140719 | -1.065936 | -0.516717 | -0.147658 | -0.102007 | -0.119959 | -0.667563 | -0.551490 | -0.567129 | -0.508511 | -0.504662 | -0.592727 | 0.548158 | -0.543422 | -0.439027 | 0.629798 | -0.822869 | -0.988632 | -1.271634 | 0.0 | -0.071315 | 1.644608 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
5 rows × 340 columns
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectFromModel
train_data.drop("id",axis=1,inplace=True)
train_data_new=train_data.drop(['price_doc'],axis=1)
y_train=train_data[['price_doc']]
feature_sel_model = SelectFromModel(Lasso(alpha=0.005, random_state=123)) # remember to set the seed, the random state in this function
feature_sel_model.fit(train_data_new, y_train)
SelectFromModel(estimator=Lasso(alpha=0.005, copy_X=True, fit_intercept=True,
max_iter=1000, normalize=False, positive=False,
precompute=False, random_state=123,
selection='cyclic', tol=0.0001,
warm_start=False),
max_features=None, norm_order=1, prefit=False, threshold=None)
selected_feat = train_data_new.columns[(feature_sel_model.get_support())]
# let's print some stats
print('total features: {}'.format((train_data_new.shape[1])))
print('selected features: {}'.format(len(selected_feat)))
print('features with coefficients shrank to zero: {}'.format(np.sum(feature_sel_model.estimator_.coef_ == 0)))
total features: 343 selected features: 120 features with coefficients shrank to zero: 223
selected_feat
Index(['full_sq', 'life_sq', 'max_floor', 'material', 'build_year', 'num_room',
'kitch_sq', 'state', 'sub_area', 'preschool_quota',
...
'cafe_sum_5000_max_price_avg', 'cafe_count_5000_price_500',
'cafe_count_5000_price_1000', 'cafe_count_5000_price_1500',
'cafe_count_5000_price_2500', 'cafe_count_5000_price_4000',
'cafe_count_5000_price_high', 'leisure_count_5000', 'sport_count_5000',
'year'],
dtype='object', length=120)
train_data_new=train_data_new[selected_feat]
test_data.to_csv("test123.csv")
test_data_new=pd.read_csv("test123.csv",usecols=selected_feat)
test_data_new.shape
(7662, 120)
train_data_new.shape
(30471, 120)
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(train_data_new,y_train,test_size=0.3,random_state=123)
xtrain.shape,xtest.shape,ytrain.shape,ytest.shape
((21329, 120), (9142, 120), (21329, 1), (9142, 1))
ytrain=np.array(ytrain)
ytest = np.array(ytest)
def model_builder(model):
m = model
m.fit(xtrain,ytrain) # log(y) follows normal distribution
train_pred =np.exp(m.predict(xtrain))
test_pred = np.exp(m.predict(xtest))
print('========Train=======')
print('RMSE :' ,np.sqrt(mean_squared_error(ytrain,train_pred)))
print('MAPE :' , np.mean(np.abs((ytrain-train_pred)/ytrain))*100)
print('========Test=======')
print('RMSE :' ,np.sqrt(mean_squared_error(ytest,test_pred)))
print('MAPE :' , np.mean(np.abs((ytest-test_pred)/ytest))*100)
return m
train_data_new.fillna(0,inplace=True)
np.where(train_data_new.values >= np.finfo(np.float64).max)
(array([], dtype=int64), array([], dtype=int64))
lr=model_builder(LinearRegression())
========Train======= RMSE : 1.9566156934714765 MAPE : 1173.0115037051728 ========Test======= RMSE : 2.168163042526417 MAPE : 892.5329576393402
xg = model_builder(XGBRegressor())
========Train======= RMSE : 2.4013148581991 MAPE : 1673.4218221945025 ========Test======= RMSE : 2.185605323077104 MAPE : 1338.9667586534404
xg = model_builder(XGBRegressor())
========Train======= RMSE : 2.4013148581991 MAPE : 1673.4218221945025 ========Test======= RMSE : 2.185605323077104 MAPE : 1338.9667586534404
params ={'max_depth':range(1,15), 'min_samples_split':range(10,70,10)}
dt = model_builder(GridSearchCV(DecisionTreeRegressor( ) , param_grid=params , cv=2 , n_jobs=-1))
========Train======= RMSE : 1.7590339520454807 MAPE : 1473.3109878439675 ========Test======= RMSE : 1.8057512588993503 MAPE : 1238.9236506498446
params = {'n_estimators':range(50,200,50),'learning_rate':[0.5,0.7,1.0]}
ad = model_builder(GridSearchCV(AdaBoostRegressor() , param_grid=params , cv=2 ,n_jobs=-1))
========Train======= RMSE : 1.0519314655659922 MAPE : 645.4961496920954 ========Test======= RMSE : 1.081722131068907 MAPE : 544.9429240394156
gb = model_builder(GradientBoostingRegressor(n_estimators=50 , learning_rate=0.1))
========Train======= RMSE : 1.602944277401274 MAPE : 1432.9728593461186 ========Test======= RMSE : 1.626682221055294 MAPE : 1198.0175654116974
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
clf=model_builder(Ridge())
========Train======= RMSE : 1.9563973670194497 MAPE : 1172.9959597401967 ========Test======= RMSE : 2.167822260676223 MAPE : 892.5289066706323
clf2=model_builder(Lasso())
========Train======= RMSE : 1.4105768207813816 MAPE : 1169.3184292169735 ========Test======= RMSE : 1.43518690681185 MAPE : 983.7045381218818
rf123=model_builder(RandomForestRegressor())
========Train======= RMSE : 2.1293767687410385 MAPE : 1672.4426218161948 ========Test======= RMSE : 1.845423825621202 MAPE : 1265.5462614173314
sub=pd.read_csv("sample_submission.csv/sample_submission.csv")
sub.head()
| id | price_doc | |
|---|---|---|
| 0 | 30474 | 0.383503 |
| 1 | 30475 | 1.256324 |
| 2 | 30476 | 0.514588 |
| 3 | 30477 | 0.596932 |
| 4 | 30478 | 0.491853 |
test_prediction = np.exp(rf123.predict(test_data_new))
sub['price_doc']=test_prediction
sub.to_csv('final_submission_rf34.csv',index=False)